From fbe4808b6b73523d756bcebeb8ffb0e38b6a2084 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 10 Jan 2023 12:28:13 +0000
Subject: [PATCH 001/333] Add support for custom key in parallel replicas

---
 src/Core/Settings.h                           |  2 +
 src/Core/SettingsEnums.cpp                    |  4 ++
 src/Core/SettingsEnums.h                      |  9 +++
 src/Interpreters/Cluster.cpp                  |  1 -
 src/Interpreters/InterpreterSelectQuery.cpp   | 15 +++++
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 60 +++++++++++++++++--
 src/Storages/SelectQueryInfo.h                |  2 +
 7 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 2357948a1f6..a714fc449b7 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -153,6 +153,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
     M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
     M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
+    M(String, parallel_replicas_custom_key, "", "Custom key for parallel replicas", 0) \
+    M(ParallelReplicasMode, parallel_replicas_mode, ParallelReplicasMode::SAMPLE_KEY, "How to process query using multiple replicas.", 0) \
     \
     M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
     \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 632587106a1..eaeed9dc44c 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -162,4 +162,8 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
 IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
     {{"clickhouse", Dialect::clickhouse},
      {"kusto", Dialect::kusto}})
+
+IMPLEMENT_SETTING_ENUM(ParallelReplicasMode, ErrorCodes::BAD_ARGUMENTS,
+    {{"sample_key", ParallelReplicasMode::SAMPLE_KEY},
+     {"custom_key", ParallelReplicasMode::CUSTOM_KEY}})
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 97c4275c4d2..a2041634d4b 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -191,4 +191,13 @@ enum class Dialect
 };
 
 DECLARE_SETTING_ENUM(Dialect)
+
+enum class ParallelReplicasMode : uint8_t
+{
+    SAMPLE_KEY,
+    CUSTOM_KEY,
+};
+
+DECLARE_SETTING_ENUM(ParallelReplicasMode)
+
 }
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index b76434b23e7..9f0a9d3b35c 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -509,7 +509,6 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
                     shard_local_addresses.push_back(replica);
                 shard_all_addresses.push_back(replica);
             }
-
             ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
                         all_replicas_pools, settings.load_balancing,
                         settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index ba2c845ead1..2d0436fa9d3 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -319,6 +319,15 @@ static ASTPtr parseAdditionalFilterConditionForTable(
     return nullptr;
 }
 
+static ASTPtr parseParallelReplicaCustomKey(const String & setting, const Context & context)
+{
+    ParserExpression parser;
+    const auto & settings = context.getSettingsRef();
+    return parseQuery(
+        parser, setting.data(), setting.data() + setting.size(),
+        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
+}
+
 /// Returns true if we should ignore quotas and limits for a specified table in the system database.
 static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
 {
@@ -501,6 +510,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
+    if (settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY && !settings.parallel_replicas_custom_key.value.empty())
+    {
+        query_info.parallel_replica_custom_key_ast = parseParallelReplicaCustomKey(
+                settings.parallel_replicas_custom_key, *context);
+    }
+
     auto analyze = [&] (bool try_move_to_prewhere)
     {
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 1ca1779e4b0..3d48c333727 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -34,6 +34,7 @@
 #include <Processors/Transforms/AggregatingTransform.h>
 
 #include <Core/UUID.h>
+#include <Core/SettingsEnums.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeUUID.h>
@@ -44,6 +45,8 @@
 #include <IO/WriteBufferFromOStream.h>
 
 #include <Storages/MergeTree/CommonANNIndexes.h>
+#include <Storages/KeyDescription.h>
+#include <Storages/SelectQueryInfo.h>
 
 namespace DB
 {
@@ -61,6 +64,7 @@ namespace ErrorCodes
     extern const int DUPLICATED_PART_UUIDS;
     extern const int NO_SUCH_COLUMN_IN_TABLE;
     extern const int PROJECTION_NOT_USED;
+    extern const int BAD_ARGUMENTS;
 }
 
 
@@ -470,6 +474,25 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
     return plan;
 }
 
+namespace
+{
+
+bool supportsSamplingForParallelReplicas(const SelectQueryInfo & select_query_info, const MergeTreeData & data, const Settings & settings)
+{
+    if (settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+    {
+        /// maybe just don't use sampling or try to fallback to SAMPLE_KEY?
+        if (select_query_info.parallel_replica_custom_key_ast == nullptr)
+            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but no 'parallel_replicas_custom_key' defined");
+
+        return true;
+    }
+
+    return data.supportsSampling();
+}
+
+}
+
 MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
     const SelectQueryInfo & select_query_info,
     NamesAndTypesList available_real_columns,
@@ -587,9 +610,10 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
         * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals.
         */
 
+    bool supports_sampling_for_parallel_replicas = supportsSamplingForParallelReplicas(select_query_info, data, settings);
     /// Parallel replicas has been requested but there is no way to sample data.
     /// Select all data from first replica and no data from other replicas.
-    if (settings.parallel_replicas_count > 1 && !data.supportsSampling() && settings.parallel_replica_offset > 0)
+    if (settings.parallel_replicas_count > 1 && !supports_sampling_for_parallel_replicas && settings.parallel_replica_offset > 0)
     {
         LOG_DEBUG(log, "Will use no data on this replica because parallel replicas processing has been requested"
             " (the setting 'max_parallel_replicas') but the table does not support sampling and this replica is not the first.");
@@ -597,16 +621,34 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
         return sampling;
     }
 
-    sampling.use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && data.supportsSampling());
+    sampling.use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && supports_sampling_for_parallel_replicas);
     bool no_data = false;   /// There is nothing left after sampling.
 
+    std::optional<KeyDescription> parallel_replicas_custom_key_description;
+    const auto get_sampling_key = [&]() -> const KeyDescription &
+    {
+        if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+        {
+            assert(select_query_info.parallel_replica_custom_key_ast);
+
+            LOG_INFO(log, "Using custom key for sampling while processing with multiple replicas");
+
+            if (!parallel_replicas_custom_key_description)
+                parallel_replicas_custom_key_description = KeyDescription::getKeyFromAST(select_query_info.parallel_replica_custom_key_ast, metadata_snapshot->columns, context);
+
+            return *parallel_replicas_custom_key_description;
+        }
+
+        return metadata_snapshot->getSamplingKey();
+    };
+
     if (sampling.use_sampling)
     {
         if (sample_factor_column_queried && relative_sample_size != RelativeSize(0))
             sampling.used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
 
         RelativeSize size_of_universum = 0;
-        const auto & sampling_key = metadata_snapshot->getSamplingKey();
+        const auto & sampling_key = get_sampling_key();
         DataTypePtr sampling_column_type = sampling_key.data_types[0];
 
         if (sampling_key.data_types.size() == 1)
@@ -681,7 +723,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             /// If sample and final are used together no need to calculate sampling expression twice.
             /// The first time it was calculated for final, because sample key is a part of the PK.
             /// So, assume that we already have calculated column.
-            ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST();
+            ASTPtr sampling_key_ast = sampling_key.definition_ast;
 
             if (final)
             {
@@ -693,8 +735,11 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             if (has_lower_limit)
             {
                 if (!key_condition.addCondition(
-                        sampling_key.column_names[0], Range::createLeftBounded(lower, true, sampling_key.data_types[0]->isNullable())))
+                        sampling_key.column_names[0], Range::createLeftBounded(lower, true, sampling_key.data_types[0]->isNullable()))
+                    && (settings.parallel_replicas_count <= 1 || settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY))
+                {
                     throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
+                }
 
                 ASTPtr args = std::make_shared<ASTExpressionList>();
                 args->children.push_back(sampling_key_ast);
@@ -711,8 +756,11 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             if (has_upper_limit)
             {
                 if (!key_condition.addCondition(
-                        sampling_key.column_names[0], Range::createRightBounded(upper, false, sampling_key.data_types[0]->isNullable())))
+                        sampling_key.column_names[0], Range::createRightBounded(upper, false, sampling_key.data_types[0]->isNullable()))
+                    && (settings.parallel_replicas_count <= 1 || settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY))
+                {
                     throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
+                }
 
                 ASTPtr args = std::make_shared<ASTExpressionList>();
                 args->children.push_back(sampling_key_ast);
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index a8a8ae4e877..ec3af64d93a 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -209,6 +209,8 @@ struct SelectQueryInfo
     /// It is needed for PK analysis based on row_level_policy and additional_filters.
     ASTs filter_asts;
 
+    ASTPtr parallel_replica_custom_key_ast;
+
     /// Filter actions dag for current storage
     ActionsDAGPtr filter_actions_dag;
 

From 19e5988d3312194be35ab0d26c5c2966890a643a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 11 Jan 2023 09:57:13 +0000
Subject: [PATCH 002/333] Better

---
 src/Interpreters/InterpreterSelectQuery.cpp   | 63 ++++++++++++++++---
 src/Interpreters/InterpreterSelectQuery.h     |  2 +
 .../MergeTree/MergeTreeDataSelectExecutor.cpp | 60 ++----------------
 3 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 2d0436fa9d3..1cd5d0004fa 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -510,10 +510,37 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
-    if (settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY && !settings.parallel_replicas_custom_key.value.empty())
+
+    ASTPtr parallel_replicas_custom_filter_ast = nullptr;
+    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
     {
-        query_info.parallel_replica_custom_key_ast = parseParallelReplicaCustomKey(
+        if (settings.parallel_replicas_custom_key.value.empty())
+            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
+
+        auto custom_key_ast = parseParallelReplicaCustomKey(
                 settings.parallel_replicas_custom_key, *context);
+
+        // first we do modulo with replica count
+        ASTPtr args = std::make_shared<ASTExpressionList>();
+        args->children.push_back(custom_key_ast);
+        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replicas_count.value));
+
+        auto modulo_function = std::make_shared<ASTFunction>();
+        modulo_function->name = "positiveModulo";
+        modulo_function->arguments = args;
+        modulo_function->children.push_back(modulo_function->arguments);
+
+        /// then we compare result to the current replica number (offset)
+        args = std::make_shared<ASTExpressionList>();
+        args->children.push_back(modulo_function);
+        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replica_offset.value));
+
+        auto equals_function = std::make_shared<ASTFunction>();
+        equals_function->name = "equals";
+        equals_function->arguments = args;
+        equals_function->children.push_back(equals_function->arguments);
+
+        parallel_replicas_custom_filter_ast = equals_function;
     }
 
     auto analyze = [&] (bool try_move_to_prewhere)
@@ -653,6 +680,16 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 query_info.filter_asts.push_back(query_info.additional_filter_ast);
             }
 
+            if (parallel_replicas_custom_filter_ast)
+            {
+                custom_key_filter_info = generateFilterActions(
+                        table_id, parallel_replicas_custom_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns,
+                        prepared_sets);
+
+                custom_key_filter_info->do_remove_column = true;
+                query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast);
+            }
+
             source_header = storage_snapshot->getSampleBlockForColumns(required_columns);
         }
 
@@ -1396,17 +1433,23 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                 query_plan.addStep(std::move(row_level_security_step));
             }
 
-            if (additional_filter_info)
+            const auto add_filter_step = [&](const auto & new_filter_info, const std::string & description)
             {
-                auto additional_filter_step = std::make_unique<FilterStep>(
+                auto filter_step = std::make_unique<FilterStep>(
                     query_plan.getCurrentDataStream(),
-                    additional_filter_info->actions,
-                    additional_filter_info->column_name,
-                    additional_filter_info->do_remove_column);
+                    new_filter_info->actions,
+                    new_filter_info->column_name,
+                    new_filter_info->do_remove_column);
 
-                additional_filter_step->setStepDescription("Additional filter");
-                query_plan.addStep(std::move(additional_filter_step));
-            }
+                filter_step->setStepDescription(description);
+                query_plan.addStep(std::move(filter_step));
+            };
+
+            if (additional_filter_info)
+                add_filter_step(additional_filter_info, "Additional filter");
+
+            if (custom_key_filter_info)
+                add_filter_step(custom_key_filter_info, "Paralel replica custom key filter");
 
             if (expressions.before_array_join)
             {
diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h
index 761eea8e1b8..5667c0e1fa7 100644
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@@ -224,6 +224,8 @@ private:
     /// For additional_filter setting.
     FilterDAGInfoPtr additional_filter_info;
 
+    FilterDAGInfoPtr custom_key_filter_info;
+
     QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
 
     /// List of columns to read to execute the query.
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 3d48c333727..d55e3f86f30 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -34,7 +34,6 @@
 #include <Processors/Transforms/AggregatingTransform.h>
 
 #include <Core/UUID.h>
-#include <Core/SettingsEnums.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeUUID.h>
@@ -45,8 +44,6 @@
 #include <IO/WriteBufferFromOStream.h>
 
 #include <Storages/MergeTree/CommonANNIndexes.h>
-#include <Storages/KeyDescription.h>
-#include <Storages/SelectQueryInfo.h>
 
 namespace DB
 {
@@ -64,7 +61,6 @@ namespace ErrorCodes
     extern const int DUPLICATED_PART_UUIDS;
     extern const int NO_SUCH_COLUMN_IN_TABLE;
     extern const int PROJECTION_NOT_USED;
-    extern const int BAD_ARGUMENTS;
 }
 
 
@@ -474,25 +470,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
     return plan;
 }
 
-namespace
-{
-
-bool supportsSamplingForParallelReplicas(const SelectQueryInfo & select_query_info, const MergeTreeData & data, const Settings & settings)
-{
-    if (settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
-    {
-        /// maybe just don't use sampling or try to fallback to SAMPLE_KEY?
-        if (select_query_info.parallel_replica_custom_key_ast == nullptr)
-            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but no 'parallel_replicas_custom_key' defined");
-
-        return true;
-    }
-
-    return data.supportsSampling();
-}
-
-}
-
 MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
     const SelectQueryInfo & select_query_info,
     NamesAndTypesList available_real_columns,
@@ -610,10 +587,9 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
         * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals.
         */
 
-    bool supports_sampling_for_parallel_replicas = supportsSamplingForParallelReplicas(select_query_info, data, settings);
     /// Parallel replicas has been requested but there is no way to sample data.
     /// Select all data from first replica and no data from other replicas.
-    if (settings.parallel_replicas_count > 1 && !supports_sampling_for_parallel_replicas && settings.parallel_replica_offset > 0)
+    if (settings.parallel_replicas_count > 1 && !data.supportsSampling() && settings.parallel_replica_offset > 0)
     {
         LOG_DEBUG(log, "Will use no data on this replica because parallel replicas processing has been requested"
             " (the setting 'max_parallel_replicas') but the table does not support sampling and this replica is not the first.");
@@ -621,34 +597,16 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
         return sampling;
     }
 
-    sampling.use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && supports_sampling_for_parallel_replicas);
+    sampling.use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY && data.supportsSampling());
     bool no_data = false;   /// There is nothing left after sampling.
 
-    std::optional<KeyDescription> parallel_replicas_custom_key_description;
-    const auto get_sampling_key = [&]() -> const KeyDescription &
-    {
-        if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
-        {
-            assert(select_query_info.parallel_replica_custom_key_ast);
-
-            LOG_INFO(log, "Using custom key for sampling while processing with multiple replicas");
-
-            if (!parallel_replicas_custom_key_description)
-                parallel_replicas_custom_key_description = KeyDescription::getKeyFromAST(select_query_info.parallel_replica_custom_key_ast, metadata_snapshot->columns, context);
-
-            return *parallel_replicas_custom_key_description;
-        }
-
-        return metadata_snapshot->getSamplingKey();
-    };
-
     if (sampling.use_sampling)
     {
         if (sample_factor_column_queried && relative_sample_size != RelativeSize(0))
             sampling.used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
 
         RelativeSize size_of_universum = 0;
-        const auto & sampling_key = get_sampling_key();
+        const auto & sampling_key = metadata_snapshot->getSamplingKey();
         DataTypePtr sampling_column_type = sampling_key.data_types[0];
 
         if (sampling_key.data_types.size() == 1)
@@ -723,7 +681,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             /// If sample and final are used together no need to calculate sampling expression twice.
             /// The first time it was calculated for final, because sample key is a part of the PK.
             /// So, assume that we already have calculated column.
-            ASTPtr sampling_key_ast = sampling_key.definition_ast;
+            ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST();
 
             if (final)
             {
@@ -735,11 +693,8 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             if (has_lower_limit)
             {
                 if (!key_condition.addCondition(
-                        sampling_key.column_names[0], Range::createLeftBounded(lower, true, sampling_key.data_types[0]->isNullable()))
-                    && (settings.parallel_replicas_count <= 1 || settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY))
-                {
+                        sampling_key.column_names[0], Range::createLeftBounded(lower, true, sampling_key.data_types[0]->isNullable())))
                     throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
-                }
 
                 ASTPtr args = std::make_shared<ASTExpressionList>();
                 args->children.push_back(sampling_key_ast);
@@ -756,11 +711,8 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             if (has_upper_limit)
             {
                 if (!key_condition.addCondition(
-                        sampling_key.column_names[0], Range::createRightBounded(upper, false, sampling_key.data_types[0]->isNullable()))
-                    && (settings.parallel_replicas_count <= 1 || settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY))
-                {
+                        sampling_key.column_names[0], Range::createRightBounded(upper, false, sampling_key.data_types[0]->isNullable())))
                     throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
-                }
 
                 ASTPtr args = std::make_shared<ASTExpressionList>();
                 args->children.push_back(sampling_key_ast);

From f950b75ef0adb6543b2c1f24117d51b35482f1bd Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 11 Jan 2023 10:42:01 +0000
Subject: [PATCH 003/333] Fix

---
 src/Interpreters/InterpreterSelectQuery.cpp | 9 +++++----
 src/Interpreters/InterpreterSelectQuery.h   | 3 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 1cd5d0004fa..8f03d83c1c9 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -112,6 +112,7 @@ namespace ErrorCodes
     extern const int INVALID_WITH_FILL_EXPRESSION;
     extern const int ACCESS_DENIED;
     extern const int UNKNOWN_IDENTIFIER;
+    extern const int BAD_ARGUMENTS;
 }
 
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
@@ -682,11 +683,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
             if (parallel_replicas_custom_filter_ast)
             {
-                custom_key_filter_info = generateFilterActions(
+                parallel_replicas_custom_filter_info = generateFilterActions(
                         table_id, parallel_replicas_custom_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns,
                         prepared_sets);
 
-                custom_key_filter_info->do_remove_column = true;
+                parallel_replicas_custom_filter_info->do_remove_column = true;
                 query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast);
             }
 
@@ -1448,8 +1449,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             if (additional_filter_info)
                 add_filter_step(additional_filter_info, "Additional filter");
 
-            if (custom_key_filter_info)
-                add_filter_step(custom_key_filter_info, "Paralel replica custom key filter");
+            if (parallel_replicas_custom_filter_info)
+                add_filter_step(parallel_replicas_custom_filter_info, "Parallel replica custom key filter");
 
             if (expressions.before_array_join)
             {
diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h
index 5667c0e1fa7..304d9d230c3 100644
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@@ -224,7 +224,8 @@ private:
     /// For additional_filter setting.
     FilterDAGInfoPtr additional_filter_info;
 
-    FilterDAGInfoPtr custom_key_filter_info;
+    /// For "per replica" filter when multiple replicas are used
+    FilterDAGInfoPtr parallel_replicas_custom_filter_info;
 
     QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
 

From ecbffa80b6b05fc34f84e32d88ad03667984fd69 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 12 Jan 2023 07:56:15 +0000
Subject: [PATCH 004/333] Add READ_TASKS mode

---
 src/Core/SettingsEnums.cpp                          |  3 ++-
 src/Core/SettingsEnums.h                            |  1 +
 .../MergeTree/MergeTreeDataSelectExecutor.cpp       | 13 +++++++++----
 src/Storages/StorageDistributed.cpp                 |  3 ++-
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index eaeed9dc44c..18abac0fe4a 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -165,5 +165,6 @@ IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
 
 IMPLEMENT_SETTING_ENUM(ParallelReplicasMode, ErrorCodes::BAD_ARGUMENTS,
     {{"sample_key", ParallelReplicasMode::SAMPLE_KEY},
-     {"custom_key", ParallelReplicasMode::CUSTOM_KEY}})
+     {"custom_key", ParallelReplicasMode::CUSTOM_KEY},
+     {"read_tasks", ParallelReplicasMode::READ_TASKS}})
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index a2041634d4b..4acf2889e37 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -196,6 +196,7 @@ enum class ParallelReplicasMode : uint8_t
 {
     SAMPLE_KEY,
     CUSTOM_KEY,
+    READ_TASKS,
 };
 
 DECLARE_SETTING_ENUM(ParallelReplicasMode)
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index d55e3f86f30..3a4e509163b 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -589,16 +589,21 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
 
     /// Parallel replicas has been requested but there is no way to sample data.
     /// Select all data from first replica and no data from other replicas.
-    if (settings.parallel_replicas_count > 1 && !data.supportsSampling() && settings.parallel_replica_offset > 0)
+    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY
+        && !data.supportsSampling() && settings.parallel_replica_offset > 0)
     {
-        LOG_DEBUG(log, "Will use no data on this replica because parallel replicas processing has been requested"
+        LOG_DEBUG(
+            log,
+            "Will use no data on this replica because parallel replicas processing has been requested"
             " (the setting 'max_parallel_replicas') but the table does not support sampling and this replica is not the first.");
         sampling.read_nothing = true;
         return sampling;
     }
 
-    sampling.use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY && data.supportsSampling());
-    bool no_data = false;   /// There is nothing left after sampling.
+    sampling.use_sampling = relative_sample_size > 0
+        || (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY
+            && data.supportsSampling());
+    bool no_data = false; /// There is nothing left after sampling.
 
     if (sampling.use_sampling)
     {
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 6473e00cdf2..04b98fa8d20 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -697,7 +697,8 @@ void StorageDistributed::read(
 
 
     auto settings = local_context->getSettingsRef();
-    bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas && !settings.use_hedged_requests;
+    bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
+        && !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
 
     if (parallel_replicas)
         ClusterProxy::executeQueryWithParallelReplicas(

From 5589bec33beeced38d3088c8e70800ca5c71a9eb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 13 Jan 2023 11:35:37 +0000
Subject: [PATCH 005/333] Update tests

---
 ...4_shard_distributed_with_many_replicas.sql |  1 -
 ...here_max_parallel_replicas_distributed.sql |  2 --
 .../01034_sample_final_distributed.sql        |  2 --
 ...099_parallel_distributed_insert_select.sql |  1 -
 .../01517_select_final_distributed.sql        |  2 --
 .../01557_max_parallel_replicas_no_sample.sql |  2 --
 .../02221_parallel_replicas_bug.sh            |  2 +-
 ...arallel_reading_from_replicas_benchmark.sh |  1 +
 .../02404_memory_bound_merging.sql            |  1 +
 ...max_parallel_replicas_custom_key.reference |  0
 ...02527_max_parallel_replicas_custom_key.sql | 36 +++++++++++++++++++
 11 files changed, 39 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
 create mode 100644 tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql

diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
index 901b818cbc0..ab4c433ba47 100644
--- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
+++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
@@ -1,6 +1,5 @@
 -- Tags: replica, distributed
 
-SET allow_experimental_parallel_reading_from_replicas = 0;
 SET max_parallel_replicas = 2;
 DROP TABLE IF EXISTS report;
 
diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
index 86c84427297..b26c9af14a9 100644
--- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
+++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
@@ -1,7 +1,5 @@
 -- Tags: replica, distributed
 
-set allow_experimental_parallel_reading_from_replicas=0;
-
 drop table if exists test_max_parallel_replicas_lr;
 
 -- If you wonder why the table is named with "_lr" suffix in this test.
diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql
index a81fef645db..b784b35cbb3 100644
--- a/tests/queries/0_stateless/01034_sample_final_distributed.sql
+++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql
@@ -1,7 +1,5 @@
 -- Tags: distributed
 
-set allow_experimental_parallel_reading_from_replicas = 0;
-
 drop table if exists sample_final;
 create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID);
 insert into sample_final select number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), number % 3 = 1 ? -1 : 1 from numbers(1000000);
diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
index aa924218360..a75a5b2c33d 100644
--- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
+++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
@@ -2,7 +2,6 @@
 
 -- set insert_distributed_sync = 1;  -- see https://github.com/ClickHouse/ClickHouse/issues/18971
 
-SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525
 SET prefer_localhost_replica = 1;
 
 DROP TABLE IF EXISTS local_01099_a;
diff --git a/tests/queries/0_stateless/01517_select_final_distributed.sql b/tests/queries/0_stateless/01517_select_final_distributed.sql
index 701828b0b38..a3d1fcfc185 100644
--- a/tests/queries/0_stateless/01517_select_final_distributed.sql
+++ b/tests/queries/0_stateless/01517_select_final_distributed.sql
@@ -1,7 +1,5 @@
 -- Tags: distributed
 
-SET allow_experimental_parallel_reading_from_replicas = 0;
-
 DROP TABLE IF EXISTS test5346;
 
 CREATE TABLE test5346 (`Id` String, `Timestamp` DateTime, `updated` DateTime) 
diff --git a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
index 04777f5b31c..2b1a66147a4 100644
--- a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
+++ b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
@@ -1,7 +1,5 @@
 -- Tags: replica
 
-SET allow_experimental_parallel_reading_from_replicas=0;
-
 DROP TABLE IF EXISTS t;
 CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
 INSERT INTO t VALUES ('Hello');
diff --git a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh
index cce32bf8272..1a703b20d2a 100755
--- a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh
+++ b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh
@@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 
-${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null
+${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_mode='read_tasks' -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null
diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
index 2a163746e20..283c51cf0a2 100755
--- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
+++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
@@ -15,6 +15,7 @@ as select * from numbers(1);
 #   Logical error: 'Coordinator for parallel reading from replicas is not initialized'.
 opts=(
     --allow_experimental_parallel_reading_from_replicas 1
+    --parallel_replicas_mode 'read_tasks'
     --max_parallel_replicas 3
 
     --iterations 1
diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql
index c41e2d3abae..fb0c65e6a7c 100644
--- a/tests/queries/0_stateless/02404_memory_bound_merging.sql
+++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql
@@ -49,6 +49,7 @@ select a, count() from dist_t_different_dbs group by a, b order by a limit 5 off
 -- { echoOff } --
 
 set allow_experimental_parallel_reading_from_replicas = 1;
+set parallel_replicas_mode = 'read_tasks';
 set max_parallel_replicas = 3;
 set use_hedged_requests = 0;
 
diff --git a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql
new file mode 100644
index 00000000000..b716600b9d5
--- /dev/null
+++ b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql
@@ -0,0 +1,36 @@
+-- Tags: replica
+
+DROP TABLE IF EXISTS t;
+
+CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
+INSERT INTO t VALUES ('Hello');
+
+SET max_parallel_replicas = 3;
+SET parallel_replicas_mode = 'custom_key';
+
+SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'sipHash64(x)';
+
+DROP TABLE t;
+
+CREATE TABLE t (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x) SAMPLE BY cityHash64(x);
+INSERT INTO t SELECT toString(number), number FROM numbers(1000);
+
+SET max_parallel_replicas = 1;
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
+
+SET max_parallel_replicas = 2;
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
+
+SET max_parallel_replicas = 3;
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
+
+DROP TABLE t;

From e60c2fa77f6c00fd69ea25e7117bf0f8eeba36bf Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 13 Jan 2023 12:47:38 +0000
Subject: [PATCH 006/333] Update reference

---
 ...02527_max_parallel_replicas_custom_key.reference | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
index e69de29bb2d..c51a7e10fb2 100644
--- a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
+++ b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
@@ -0,0 +1,13 @@
+Hello
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000

From 57e771226cb3962941be802e2b272a753024c567 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 16 Jan 2023 10:11:21 +0000
Subject: [PATCH 007/333] Add range mode for custom key

---
 src/Core/Settings.h                           |   3 +-
 src/Core/SettingsEnums.cpp                    |   4 +
 src/Core/SettingsEnums.h                      |   8 +
 src/Interpreters/InterpreterSelectQuery.cpp   | 186 ++++++++++++++----
 ...rallel_replicas_custom_key_range.reference |  13 ++
 ...max_parallel_replicas_custom_key_range.sql |  37 ++++
 6 files changed, 214 insertions(+), 37 deletions(-)
 create mode 100644 tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference
 create mode 100644 tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 573d742ff58..61d1c62bcff 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -153,7 +153,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
     M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
     M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
-    M(String, parallel_replicas_custom_key, "", "Custom key for parallel replicas", 0) \
+    M(String, parallel_replicas_custom_key, "", "Custom key for parallel replicas using modulo operation on the key for assigning work to replicas.", 0) \
+    M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
     M(ParallelReplicasMode, parallel_replicas_mode, ParallelReplicasMode::SAMPLE_KEY, "How to process query using multiple replicas.", 0) \
     \
     M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 33aba922e95..69fd78d7eb3 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -168,6 +168,10 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasMode, ErrorCodes::BAD_ARGUMENTS,
      {"custom_key", ParallelReplicasMode::CUSTOM_KEY},
      {"read_tasks", ParallelReplicasMode::READ_TASKS}})
 
+IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,
+    {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT},
+     {"range", ParallelReplicasCustomKeyFilterType::RANGE}})
+
 IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
     {{"mmap", LocalFSReadMethod::mmap},
      {"pread", LocalFSReadMethod::pread},
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 83c12477165..52c1b369f4b 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -202,5 +202,13 @@ enum class ParallelReplicasMode : uint8_t
 
 DECLARE_SETTING_ENUM(ParallelReplicasMode)
 
+enum class ParallelReplicasCustomKeyFilterType : uint8_t
+{
+    DEFAULT,
+    RANGE,
+};
+
+DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType)
+
 DECLARE_SETTING_ENUM(LocalFSReadMethod)
 }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 28f63d5967c..6b66c75e54f 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -94,6 +94,9 @@
 #include <Common/scope_guard_safe.h>
 #include <Parsers/FunctionParameterValuesVisitor.h>
 #include <Common/typeid_cast.h>
+#include "Core/SettingsEnums.h"
+
+#include <boost/rational.hpp>
 
 namespace DB
 {
@@ -113,6 +116,7 @@ namespace ErrorCodes
     extern const int ACCESS_DENIED;
     extern const int UNKNOWN_IDENTIFIER;
     extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
@@ -228,10 +232,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 InterpreterSelectQuery::~InterpreterSelectQuery() = default;
 
 
+namespace
+{
+
 /** There are no limits on the maximum size of the result for the subquery.
   *  Since the result of the query is not the result of the entire query.
   */
-static ContextPtr getSubqueryContext(const ContextPtr & context)
+ContextPtr getSubqueryContext(const ContextPtr & context)
 {
     auto subquery_context = Context::createCopy(context);
     Settings subquery_settings = context->getSettings();
@@ -243,7 +250,7 @@ static ContextPtr getSubqueryContext(const ContextPtr & context)
     return subquery_context;
 }
 
-static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
+void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
 {
     ASTSelectQuery & select = query->as<ASTSelectQuery &>();
 
@@ -263,7 +270,7 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table
 }
 
 /// Checks that the current user has the SELECT privilege.
-static void checkAccessRightsForSelect(
+void checkAccessRightsForSelect(
     const ContextPtr & context,
     const StorageID & table_id,
     const StorageMetadataPtr & table_metadata,
@@ -293,7 +300,7 @@ static void checkAccessRightsForSelect(
     context->checkAccess(AccessType::SELECT, table_id, syntax_analyzer_result.requiredSourceColumnsForAccessCheck());
 }
 
-static ASTPtr parseAdditionalFilterConditionForTable(
+ASTPtr parseAdditionalFilterConditionForTable(
     const Map & setting,
     const DatabaseAndTableWithAlias & target,
     const Context & context)
@@ -320,7 +327,7 @@ static ASTPtr parseAdditionalFilterConditionForTable(
     return nullptr;
 }
 
-static ASTPtr parseParallelReplicaCustomKey(const String & setting, const Context & context)
+ASTPtr parseParallelReplicaCustomKey(const String & setting, const Context & context)
 {
     ParserExpression parser;
     const auto & settings = context.getSettingsRef();
@@ -329,8 +336,142 @@ static ASTPtr parseParallelReplicaCustomKey(const String & setting, const Contex
         "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
 }
 
+ASTPtr getCustomKeyFilterForParallelReplica(const Settings & settings, const StoragePtr & storage, const ContextPtr & context)
+{
+    assert(settings.parallel_replicas_count > 1);
+
+    if (settings.parallel_replicas_custom_key.value.empty())
+        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
+
+    auto custom_key_ast = parseParallelReplicaCustomKey(
+            settings.parallel_replicas_custom_key, *context);
+
+    if (settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
+    {
+        // first we do modulo with replica count
+        ASTPtr args = std::make_shared<ASTExpressionList>();
+        args->children.push_back(custom_key_ast);
+        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replicas_count.value));
+
+        auto modulo_function = std::make_shared<ASTFunction>();
+        modulo_function->name = "positiveModulo";
+        modulo_function->arguments = args;
+        modulo_function->children.push_back(modulo_function->arguments);
+
+        /// then we compare result to the current replica number (offset)
+        args = std::make_shared<ASTExpressionList>();
+        args->children.push_back(modulo_function);
+        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replica_offset.value));
+
+        auto equals_function = std::make_shared<ASTFunction>();
+        equals_function->name = "equals";
+        equals_function->arguments = args;
+        equals_function->children.push_back(equals_function->arguments);
+
+        return equals_function;
+    }
+
+    // create range query
+    assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
+
+    if (!storage)
+        throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
+
+    KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, storage->getInMemoryMetadataPtr()->columns, context);
+
+    using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
+
+    RelativeSize size_of_universum = 0;
+    DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
+
+    if (custom_key_description.data_types.size() == 1)
+    {
+        if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
+        else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+        else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
+        else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
+    }
+
+    if (size_of_universum == RelativeSize(0))
+        throw Exception(
+            ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
+            "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
+
+    RelativeSize relative_range_size = RelativeSize(1) / settings.parallel_replicas_count.value;
+    RelativeSize relative_range_offset = relative_range_size * RelativeSize(settings.parallel_replica_offset.value);
+
+    /// Calculate the half-interval of `[lower, upper)` column values.
+    bool has_lower_limit = false;
+    bool has_upper_limit = false;
+
+    RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
+    RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
+
+    UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
+    UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
+
+    if (lower > 0)
+        has_lower_limit = true;
+
+    if (upper_limit_rational < size_of_universum)
+        has_upper_limit = true;
+
+    assert(has_lower_limit || has_upper_limit);
+
+    /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
+    std::shared_ptr<ASTFunction> lower_function;
+    std::shared_ptr<ASTFunction> upper_function;
+
+    if (has_lower_limit)
+    {
+        ASTPtr args = std::make_shared<ASTExpressionList>();
+        args->children.push_back(custom_key_ast);
+        args->children.push_back(std::make_shared<ASTLiteral>(lower));
+
+        lower_function = std::make_shared<ASTFunction>();
+        lower_function->name = "greaterOrEquals";
+        lower_function->arguments = args;
+        lower_function->children.push_back(lower_function->arguments);
+
+        if (!has_upper_limit)
+            return lower_function;
+    }
+
+    if (has_upper_limit)
+    {
+        ASTPtr args = std::make_shared<ASTExpressionList>();
+        args->children.push_back(custom_key_ast);
+        args->children.push_back(std::make_shared<ASTLiteral>(upper));
+
+        upper_function = std::make_shared<ASTFunction>();
+        upper_function->name = "less";
+        upper_function->arguments = args;
+        upper_function->children.push_back(upper_function->arguments);
+
+        if (!has_lower_limit)
+            return upper_function;
+    }
+
+    assert(has_lower_limit && has_upper_limit);
+
+    ASTPtr args = std::make_shared<ASTExpressionList>();
+    args->children.push_back(lower_function);
+    args->children.push_back(upper_function);
+
+    auto filter_function = std::make_shared<ASTFunction>();
+    filter_function->name = "and";
+    filter_function->arguments = args;
+    filter_function->children.push_back(filter_function->arguments);
+
+    return filter_function;
+}
+
 /// Returns true if we should ignore quotas and limits for a specified table in the system database.
-static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
+bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
 {
     if (table_id.database_name == DatabaseCatalog::SYSTEM_DATABASE)
     {
@@ -341,6 +482,8 @@ static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
     return false;
 }
 
+}
+
 InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const ContextPtr & context_,
@@ -511,38 +654,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
-
     ASTPtr parallel_replicas_custom_filter_ast = nullptr;
     if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
-    {
-        if (settings.parallel_replicas_custom_key.value.empty())
-            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
-
-        auto custom_key_ast = parseParallelReplicaCustomKey(
-                settings.parallel_replicas_custom_key, *context);
-
-        // first we do modulo with replica count
-        ASTPtr args = std::make_shared<ASTExpressionList>();
-        args->children.push_back(custom_key_ast);
-        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replicas_count.value));
-
-        auto modulo_function = std::make_shared<ASTFunction>();
-        modulo_function->name = "positiveModulo";
-        modulo_function->arguments = args;
-        modulo_function->children.push_back(modulo_function->arguments);
-
-        /// then we compare result to the current replica number (offset)
-        args = std::make_shared<ASTExpressionList>();
-        args->children.push_back(modulo_function);
-        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replica_offset.value));
-
-        auto equals_function = std::make_shared<ASTFunction>();
-        equals_function->name = "equals";
-        equals_function->arguments = args;
-        equals_function->children.push_back(equals_function->arguments);
-
-        parallel_replicas_custom_filter_ast = equals_function;
-    }
+        parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(settings, storage, context);
 
     auto analyze = [&] (bool try_move_to_prewhere)
     {
diff --git a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference
new file mode 100644
index 00000000000..c51a7e10fb2
--- /dev/null
+++ b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference
@@ -0,0 +1,13 @@
+Hello
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
diff --git a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
new file mode 100644
index 00000000000..0c1cb7fb48b
--- /dev/null
+++ b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
@@ -0,0 +1,37 @@
+-- Tags: replica
+
+DROP TABLE IF EXISTS t;
+
+CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
+INSERT INTO t VALUES ('Hello');
+
+SET max_parallel_replicas = 3;
+SET parallel_replicas_mode = 'custom_key';
+SET parallel_replicas_custom_key_filter_type = 'range';
+
+SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'sipHash64(x)';
+
+DROP TABLE t;
+
+CREATE TABLE t (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x) SAMPLE BY cityHash64(x);
+INSERT INTO t SELECT toString(number), number FROM numbers(1000);
+
+SET max_parallel_replicas = 1;
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
+
+SET max_parallel_replicas = 2;
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
+
+SET max_parallel_replicas = 3;
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
+SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
+
+DROP TABLE t;

From 5adf44775830d92e1bfba53ebfba5c71a956d48e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 16 Jan 2023 11:22:12 +0000
Subject: [PATCH 008/333] Use unsigned integer for column

---
 .../02528_max_parallel_replicas_custom_key_range.sql            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
index 0c1cb7fb48b..a8ef9e58d40 100644
--- a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
+++ b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
@@ -13,7 +13,7 @@ SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_
 
 DROP TABLE t;
 
-CREATE TABLE t (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x) SAMPLE BY cityHash64(x);
+CREATE TABLE t (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x) SAMPLE BY cityHash64(x);
 INSERT INTO t SELECT toString(number), number FROM numbers(1000);
 
 SET max_parallel_replicas = 1;

From bd352068d749d428d909c0b5d81f299a7e85e3f1 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 17 Jan 2023 12:34:42 +0000
Subject: [PATCH 009/333] Turn replicas into shard for custom_key

---
 .../ClusterProxy/executeQuery.cpp             |   8 +-
 src/Interpreters/ClusterProxy/executeQuery.h  |   3 +-
 src/Interpreters/InterpreterSelectQuery.cpp   | 157 -----------------
 src/Storages/StorageDistributed.cpp           | 162 +++++++++++++++++-
 4 files changed, 169 insertions(+), 161 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 8ea6298c50b..fc490306e08 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -127,7 +127,8 @@ void executeQuery(
     const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
     const ExpressionActionsPtr & sharding_key_expr,
     const std::string & sharding_key_column_name,
-    const ClusterPtr & not_optimized_cluster)
+    const ClusterPtr & not_optimized_cluster,
+    std::function<void(ASTPtr &, uint64_t)> add_additional_shard_filter = {})
 {
     const Settings & settings = context->getSettingsRef();
 
@@ -177,7 +178,10 @@ void executeQuery(
             visitor.visit(query_ast_for_shard);
         }
         else
-            query_ast_for_shard = query_ast;
+            query_ast_for_shard = query_ast->clone();
+
+        if (add_additional_shard_filter)
+            add_additional_shard_filter(query_ast_for_shard, shard_info.shard_num);
 
         stream_factory.createForShard(shard_info,
             query_ast_for_shard, main_table, table_func_ptr,
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index 662fe47ca65..9519fa00efe 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -50,7 +50,8 @@ void executeQuery(
     const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
     const ExpressionActionsPtr & sharding_key_expr,
     const std::string & sharding_key_column_name,
-    const ClusterPtr & not_optimized_cluster);
+    const ClusterPtr & not_optimized_cluster,
+    std::function<void(ASTPtr &, uint64_t)> add_additional_shard_filter);
 
 
 void executeQueryWithParallelReplicas(
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 6b66c75e54f..3afe83ffac3 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -327,149 +327,6 @@ ASTPtr parseAdditionalFilterConditionForTable(
     return nullptr;
 }
 
-ASTPtr parseParallelReplicaCustomKey(const String & setting, const Context & context)
-{
-    ParserExpression parser;
-    const auto & settings = context.getSettingsRef();
-    return parseQuery(
-        parser, setting.data(), setting.data() + setting.size(),
-        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
-}
-
-ASTPtr getCustomKeyFilterForParallelReplica(const Settings & settings, const StoragePtr & storage, const ContextPtr & context)
-{
-    assert(settings.parallel_replicas_count > 1);
-
-    if (settings.parallel_replicas_custom_key.value.empty())
-        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
-
-    auto custom_key_ast = parseParallelReplicaCustomKey(
-            settings.parallel_replicas_custom_key, *context);
-
-    if (settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
-    {
-        // first we do modulo with replica count
-        ASTPtr args = std::make_shared<ASTExpressionList>();
-        args->children.push_back(custom_key_ast);
-        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replicas_count.value));
-
-        auto modulo_function = std::make_shared<ASTFunction>();
-        modulo_function->name = "positiveModulo";
-        modulo_function->arguments = args;
-        modulo_function->children.push_back(modulo_function->arguments);
-
-        /// then we compare result to the current replica number (offset)
-        args = std::make_shared<ASTExpressionList>();
-        args->children.push_back(modulo_function);
-        args->children.push_back(std::make_shared<ASTLiteral>(settings.parallel_replica_offset.value));
-
-        auto equals_function = std::make_shared<ASTFunction>();
-        equals_function->name = "equals";
-        equals_function->arguments = args;
-        equals_function->children.push_back(equals_function->arguments);
-
-        return equals_function;
-    }
-
-    // create range query
-    assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
-
-    if (!storage)
-        throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
-
-    KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, storage->getInMemoryMetadataPtr()->columns, context);
-
-    using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
-
-    RelativeSize size_of_universum = 0;
-    DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
-
-    if (custom_key_description.data_types.size() == 1)
-    {
-        if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
-            size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
-        else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
-            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-        else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
-            size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
-        else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
-            size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
-    }
-
-    if (size_of_universum == RelativeSize(0))
-        throw Exception(
-            ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
-            "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
-
-    RelativeSize relative_range_size = RelativeSize(1) / settings.parallel_replicas_count.value;
-    RelativeSize relative_range_offset = relative_range_size * RelativeSize(settings.parallel_replica_offset.value);
-
-    /// Calculate the half-interval of `[lower, upper)` column values.
-    bool has_lower_limit = false;
-    bool has_upper_limit = false;
-
-    RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
-    RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
-
-    UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
-    UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
-
-    if (lower > 0)
-        has_lower_limit = true;
-
-    if (upper_limit_rational < size_of_universum)
-        has_upper_limit = true;
-
-    assert(has_lower_limit || has_upper_limit);
-
-    /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
-    std::shared_ptr<ASTFunction> lower_function;
-    std::shared_ptr<ASTFunction> upper_function;
-
-    if (has_lower_limit)
-    {
-        ASTPtr args = std::make_shared<ASTExpressionList>();
-        args->children.push_back(custom_key_ast);
-        args->children.push_back(std::make_shared<ASTLiteral>(lower));
-
-        lower_function = std::make_shared<ASTFunction>();
-        lower_function->name = "greaterOrEquals";
-        lower_function->arguments = args;
-        lower_function->children.push_back(lower_function->arguments);
-
-        if (!has_upper_limit)
-            return lower_function;
-    }
-
-    if (has_upper_limit)
-    {
-        ASTPtr args = std::make_shared<ASTExpressionList>();
-        args->children.push_back(custom_key_ast);
-        args->children.push_back(std::make_shared<ASTLiteral>(upper));
-
-        upper_function = std::make_shared<ASTFunction>();
-        upper_function->name = "less";
-        upper_function->arguments = args;
-        upper_function->children.push_back(upper_function->arguments);
-
-        if (!has_lower_limit)
-            return upper_function;
-    }
-
-    assert(has_lower_limit && has_upper_limit);
-
-    ASTPtr args = std::make_shared<ASTExpressionList>();
-    args->children.push_back(lower_function);
-    args->children.push_back(upper_function);
-
-    auto filter_function = std::make_shared<ASTFunction>();
-    filter_function->name = "and";
-    filter_function->arguments = args;
-    filter_function->children.push_back(filter_function->arguments);
-
-    return filter_function;
-}
-
 /// Returns true if we should ignore quotas and limits for a specified table in the system database.
 bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
 {
@@ -654,10 +511,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
-    ASTPtr parallel_replicas_custom_filter_ast = nullptr;
-    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
-        parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(settings, storage, context);
-
     auto analyze = [&] (bool try_move_to_prewhere)
     {
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
@@ -823,16 +676,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 query_info.filter_asts.push_back(query_info.additional_filter_ast);
             }
 
-            if (parallel_replicas_custom_filter_ast)
-            {
-                parallel_replicas_custom_filter_info = generateFilterActions(
-                        table_id, parallel_replicas_custom_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns,
-                        prepared_sets);
-
-                parallel_replicas_custom_filter_info->do_remove_column = true;
-                query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast);
-            }
-
             source_header = storage_snapshot->getSampleBlockForColumns(required_columns, parameter_values);
         }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 671ec91609e..56e5bfe9189 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -27,6 +27,7 @@
 #include <Common/quoteString.h>
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
+#include "Core/SettingsEnums.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -88,6 +89,8 @@
 #include <optional>
 #include <cassert>
 
+#include <boost/rational.hpp>
+
 
 namespace fs = std::filesystem;
 
@@ -440,6 +443,11 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
     const auto & settings = local_context->getSettingsRef();
 
     ClusterPtr cluster = getCluster();
+
+    // if it's custom_key we will turn replicas into shards and filter specific data on each of them
+    if (settings.max_parallel_replicas > 1 && cluster->getShardCount() == 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+        cluster = cluster->getClusterWithReplicasAsShards(settings);
+
     query_info.cluster = cluster;
 
     size_t nodes = getClusterQueriedNodes(settings, cluster);
@@ -749,6 +757,158 @@ void StorageDistributed::read(
     bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
         && !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
 
+    ParserExpression parser;
+    auto custom_key_ast = parseQuery(
+        parser,
+        settings.parallel_replicas_custom_key.value.data(),
+        settings.parallel_replicas_custom_key.value.data() + settings.parallel_replicas_custom_key.value.size(),
+        "parallel replicas custom key",
+        settings.max_query_size,
+        settings.max_parser_depth);
+
+    auto shard_count = query_info.getCluster()->getShardCount();
+
+    std::function<void(ASTPtr &, uint64_t)> add_additional_shard_filter;
+    if (settings.max_parallel_replicas > 1
+        && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+    {
+        add_additional_shard_filter = [&](ASTPtr & query, uint64_t shard_num)
+        {
+            ParserExpression parser;
+            auto custom_key_ast = parseQuery(
+                parser, settings.parallel_replicas_custom_key.value.data(), settings.parallel_replicas_custom_key.value.data() + settings.parallel_replicas_custom_key.value.size(),
+                "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
+
+            if (settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
+            {
+                // first we do modulo with replica count
+                ASTPtr args = std::make_shared<ASTExpressionList>();
+                args->children.push_back(custom_key_ast);
+                args->children.push_back(std::make_shared<ASTLiteral>(shard_count));
+
+                auto modulo_function = std::make_shared<ASTFunction>();
+                modulo_function->name = "positiveModulo";
+                modulo_function->arguments = args;
+                modulo_function->children.push_back(modulo_function->arguments);
+
+                /// then we compare result to the current replica number (offset)
+                args = std::make_shared<ASTExpressionList>();
+                args->children.push_back(modulo_function);
+                args->children.push_back(std::make_shared<ASTLiteral>(shard_num - 1));
+
+                auto equals_function = std::make_shared<ASTFunction>();
+                equals_function->name = "equals";
+                equals_function->arguments = args;
+                equals_function->children.push_back(equals_function->arguments);
+
+                auto & select_query = query->as<ASTSelectQuery &>();
+                select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(equals_function));
+            }
+            else
+            {
+                assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
+                auto filter_function = [&]
+                {
+                    KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, getInMemoryMetadataPtr()->columns, local_context);
+
+                    using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
+
+                    RelativeSize size_of_universum = 0;
+                     DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
+
+                    size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+                    if (custom_key_description.data_types.size() == 1)
+                    {
+                        if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
+                            size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
+                        else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
+                            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+                        else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
+                            size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
+                        else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
+                            size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
+                    }
+
+                    if (size_of_universum == RelativeSize(0))
+                        throw Exception(
+                            ErrorCodes::LOGICAL_ERROR,
+                            "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
+
+                    RelativeSize relative_range_size = RelativeSize(1) / query_info.getCluster()->getShardCount();
+                    RelativeSize relative_range_offset = relative_range_size * RelativeSize(shard_num - 1);
+
+                    /// Calculate the half-interval of `[lower, upper)` column values.
+                    bool has_lower_limit = false;
+                    bool has_upper_limit = false;
+
+                    RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
+                    RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
+
+                    UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
+                    UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
+
+                    if (lower > 0)
+                        has_lower_limit = true;
+
+                    if (upper_limit_rational < size_of_universum)
+                        has_upper_limit = true;
+
+                    assert(has_lower_limit || has_upper_limit);
+
+                    /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
+                    std::shared_ptr<ASTFunction> lower_function;
+                    std::shared_ptr<ASTFunction> upper_function;
+
+                    if (has_lower_limit)
+                    {
+                        ASTPtr args = std::make_shared<ASTExpressionList>();
+                        args->children.push_back(custom_key_ast);
+                        args->children.push_back(std::make_shared<ASTLiteral>(lower));
+
+                        lower_function = std::make_shared<ASTFunction>();
+                        lower_function->name = "greaterOrEquals";
+                        lower_function->arguments = args;
+                        lower_function->children.push_back(lower_function->arguments);
+
+                        if (!has_upper_limit)
+                            return lower_function;
+                    }
+
+                    if (has_upper_limit)
+                    {
+                        ASTPtr args = std::make_shared<ASTExpressionList>();
+                        args->children.push_back(custom_key_ast);
+                        args->children.push_back(std::make_shared<ASTLiteral>(upper));
+
+                        upper_function = std::make_shared<ASTFunction>();
+                        upper_function->name = "less";
+                        upper_function->arguments = args;
+                        upper_function->children.push_back(upper_function->arguments);
+
+                        if (!has_lower_limit)
+                            return upper_function;
+                    }
+
+                    assert(has_lower_limit && has_upper_limit);
+
+                    ASTPtr args = std::make_shared<ASTExpressionList>();
+                    args->children.push_back(lower_function);
+                    args->children.push_back(upper_function);
+
+                    auto f = std::make_shared<ASTFunction>();
+                    f->name = "and";
+                    f->arguments = args;
+                    f->children.push_back(f->arguments);
+
+                    return f;
+                };
+
+                auto & select_query = query->as<ASTSelectQuery &>();
+                select_query.setExpression(ASTSelectQuery::Expression::WHERE, filter_function());
+            }
+        };
+    }
+
     if (parallel_replicas)
         ClusterProxy::executeQueryWithParallelReplicas(
             query_plan, main_table, remote_table_function_ptr,
@@ -763,7 +923,7 @@ void StorageDistributed::read(
             select_stream_factory, log, modified_query_ast,
             local_context, query_info,
             sharding_key_expr, sharding_key_column_name,
-            query_info.cluster);
+            query_info.cluster, add_additional_shard_filter);
 
     /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
     if (!query_plan.isInitialized())

From dd31de18a3dc1f05a90f646216830d03f91fed03 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 17 Jan 2023 13:31:09 +0000
Subject: [PATCH 010/333] Extend where correctly

---
 src/Interpreters/InterpreterSelectQuery.cpp |  2 --
 src/Storages/StorageDistributed.cpp         | 38 ++++++++++++++++-----
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 3afe83ffac3..6969a068949 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -115,8 +115,6 @@ namespace ErrorCodes
     extern const int INVALID_WITH_FILL_EXPRESSION;
     extern const int ACCESS_DENIED;
     extern const int UNKNOWN_IDENTIFIER;
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 56e5bfe9189..0309aeabbc7 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -133,6 +133,7 @@ namespace ErrorCodes
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
     extern const int UNSUPPORTED_METHOD;
+    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
 namespace ActionLocks
@@ -779,6 +780,7 @@ void StorageDistributed::read(
                 parser, settings.parallel_replicas_custom_key.value.data(), settings.parallel_replicas_custom_key.value.data() + settings.parallel_replicas_custom_key.value.size(),
                 "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
 
+            ASTPtr shard_filter = nullptr ;
             if (settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
             {
                 // first we do modulo with replica count
@@ -801,8 +803,7 @@ void StorageDistributed::read(
                 equals_function->arguments = args;
                 equals_function->children.push_back(equals_function->arguments);
 
-                auto & select_query = query->as<ASTSelectQuery &>();
-                select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(equals_function));
+                shard_filter = equals_function;
             }
             else
             {
@@ -895,16 +896,37 @@ void StorageDistributed::read(
                     args->children.push_back(lower_function);
                     args->children.push_back(upper_function);
 
-                    auto f = std::make_shared<ASTFunction>();
-                    f->name = "and";
-                    f->arguments = args;
-                    f->children.push_back(f->arguments);
+                    auto and_function = std::make_shared<ASTFunction>();
+                    and_function->name = "and";
+                    and_function->arguments = args;
+                    and_function->children.push_back(and_function->arguments);
 
-                    return f;
+                    return and_function;
                 };
 
+                shard_filter = filter_function();
+            }
+
+            if (shard_filter)
+            {
                 auto & select_query = query->as<ASTSelectQuery &>();
-                select_query.setExpression(ASTSelectQuery::Expression::WHERE, filter_function());
+
+                auto where_expression = select_query.where();
+                if (where_expression)
+                {
+                    ASTPtr args = std::make_shared<ASTExpressionList>();
+                    args->children.push_back(where_expression);
+                    args->children.push_back(shard_filter);
+
+                    auto and_function = std::make_shared<ASTFunction>();
+                    and_function->name = "and";
+                    and_function->arguments = args;
+                    and_function->children.push_back(and_function->arguments);
+
+                    shard_filter = std::move(and_function);
+                }
+
+                select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter));
             }
         };
     }

From 2a1f35e6612fb07bd12090b9f3563753a0fe7183 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 07:59:57 +0000
Subject: [PATCH 011/333] Revert some changes

---
 src/Interpreters/InterpreterSelectQuery.cpp | 42 +++++++--------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 6969a068949..48326afda45 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -94,9 +94,6 @@
 #include <Common/scope_guard_safe.h>
 #include <Parsers/FunctionParameterValuesVisitor.h>
 #include <Common/typeid_cast.h>
-#include "Core/SettingsEnums.h"
-
-#include <boost/rational.hpp>
 
 namespace DB
 {
@@ -230,13 +227,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 InterpreterSelectQuery::~InterpreterSelectQuery() = default;
 
 
-namespace
-{
-
 /** There are no limits on the maximum size of the result for the subquery.
   *  Since the result of the query is not the result of the entire query.
   */
-ContextPtr getSubqueryContext(const ContextPtr & context)
+static ContextPtr getSubqueryContext(const ContextPtr & context)
 {
     auto subquery_context = Context::createCopy(context);
     Settings subquery_settings = context->getSettings();
@@ -248,7 +242,7 @@ ContextPtr getSubqueryContext(const ContextPtr & context)
     return subquery_context;
 }
 
-void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
+static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
 {
     ASTSelectQuery & select = query->as<ASTSelectQuery &>();
 
@@ -268,7 +262,7 @@ void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, cons
 }
 
 /// Checks that the current user has the SELECT privilege.
-void checkAccessRightsForSelect(
+static void checkAccessRightsForSelect(
     const ContextPtr & context,
     const StorageID & table_id,
     const StorageMetadataPtr & table_metadata,
@@ -298,7 +292,7 @@ void checkAccessRightsForSelect(
     context->checkAccess(AccessType::SELECT, table_id, syntax_analyzer_result.requiredSourceColumnsForAccessCheck());
 }
 
-ASTPtr parseAdditionalFilterConditionForTable(
+static ASTPtr parseAdditionalFilterConditionForTable(
     const Map & setting,
     const DatabaseAndTableWithAlias & target,
     const Context & context)
@@ -326,7 +320,7 @@ ASTPtr parseAdditionalFilterConditionForTable(
 }
 
 /// Returns true if we should ignore quotas and limits for a specified table in the system database.
-bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
+static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
 {
     if (table_id.database_name == DatabaseCatalog::SYSTEM_DATABASE)
     {
@@ -337,8 +331,6 @@ bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
     return false;
 }
 
-}
-
 InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const ContextPtr & context_,
@@ -1417,23 +1409,17 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                 query_plan.addStep(std::move(row_level_security_step));
             }
 
-            const auto add_filter_step = [&](const auto & new_filter_info, const std::string & description)
-            {
-                auto filter_step = std::make_unique<FilterStep>(
-                    query_plan.getCurrentDataStream(),
-                    new_filter_info->actions,
-                    new_filter_info->column_name,
-                    new_filter_info->do_remove_column);
-
-                filter_step->setStepDescription(description);
-                query_plan.addStep(std::move(filter_step));
-            };
-
             if (additional_filter_info)
-                add_filter_step(additional_filter_info, "Additional filter");
+            {
+                auto additional_filter_step = std::make_unique<FilterStep>(
+                    query_plan.getCurrentDataStream(),
+                    additional_filter_info->actions,
+                    additional_filter_info->column_name,
+                    additional_filter_info->do_remove_column);
 
-            if (parallel_replicas_custom_filter_info)
-                add_filter_step(parallel_replicas_custom_filter_info, "Parallel replica custom key filter");
+                additional_filter_step->setStepDescription("Additional filter");
+                query_plan.addStep(std::move(additional_filter_step));
+            }
 
             if (expressions.before_array_join)
             {

From 1c0a3e38c0adb78ee719310161fdf329393ded55 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 08:13:59 +0000
Subject: [PATCH 012/333] Fix queries with Distributed storage

---
 .../ClusterProxy/executeQuery.cpp             |  31 ++-
 src/Interpreters/ClusterProxy/executeQuery.h  |   3 +-
 src/Storages/StorageDistributed.cpp           | 236 ++++++++----------
 3 files changed, 137 insertions(+), 133 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index fc490306e08..d8d55b5486b 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -9,6 +9,7 @@
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/ASTFunction.h>
 #include <Interpreters/ProcessList.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/ReadFromRemote.h>
@@ -16,6 +17,7 @@
 #include <QueryPipeline/Pipe.h>
 #include <Storages/SelectQueryInfo.h>
 
+
 namespace DB
 {
 
@@ -128,7 +130,7 @@ void executeQuery(
     const ExpressionActionsPtr & sharding_key_expr,
     const std::string & sharding_key_column_name,
     const ClusterPtr & not_optimized_cluster,
-    std::function<void(ASTPtr &, uint64_t)> add_additional_shard_filter = {})
+    AdditionalShardFilterGenerator shard_filter_generator)
 {
     const Settings & settings = context->getSettingsRef();
 
@@ -180,8 +182,31 @@ void executeQuery(
         else
             query_ast_for_shard = query_ast->clone();
 
-        if (add_additional_shard_filter)
-            add_additional_shard_filter(query_ast_for_shard, shard_info.shard_num);
+        if (shard_filter_generator)
+        {
+            auto shard_filter = shard_filter_generator(shard_info.shard_num);
+            if (shard_filter)
+            {
+                auto & select_query = query_ast_for_shard->as<ASTSelectQuery &>();
+
+                auto where_expression = select_query.where();
+                if (where_expression)
+                {
+                    ASTPtr args = std::make_shared<ASTExpressionList>();
+                    args->children.push_back(where_expression);
+                    args->children.push_back(shard_filter);
+
+                    auto and_function = std::make_shared<ASTFunction>();
+                    and_function->name = "and";
+                    and_function->arguments = args;
+                    and_function->children.push_back(and_function->arguments);
+
+                    shard_filter = std::move(and_function);
+                }
+
+                select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter));
+            }
+        }
 
         stream_factory.createForShard(shard_info,
             query_ast_for_shard, main_table, table_func_ptr,
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index 9519fa00efe..25827ebf29a 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -37,6 +37,7 @@ class SelectStreamFactory;
 ContextMutablePtr updateSettingsForCluster(
     const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
 
+using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
 /// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
 /// `stream_factory` object encapsulates the logic of creating plans for a different type of query
 /// (currently SELECT, DESCRIBE).
@@ -51,7 +52,7 @@ void executeQuery(
     const ExpressionActionsPtr & sharding_key_expr,
     const std::string & sharding_key_column_name,
     const ClusterPtr & not_optimized_cluster,
-    std::function<void(ASTPtr &, uint64_t)> add_additional_shard_filter);
+    AdditionalShardFilterGenerator shard_filter_generator = {});
 
 
 void executeQueryWithParallelReplicas(
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 0b36c6c0026..a7c0dafaf62 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -758,22 +758,26 @@ void StorageDistributed::read(
     bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
         && !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
 
-    ParserExpression parser;
-    auto custom_key_ast = parseQuery(
-        parser,
-        settings.parallel_replicas_custom_key.value.data(),
-        settings.parallel_replicas_custom_key.value.data() + settings.parallel_replicas_custom_key.value.size(),
-        "parallel replicas custom key",
-        settings.max_query_size,
-        settings.max_parser_depth);
-
     auto shard_count = query_info.getCluster()->getShardCount();
 
-    std::function<void(ASTPtr &, uint64_t)> add_additional_shard_filter;
+    ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
     if (settings.max_parallel_replicas > 1
         && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
     {
-        add_additional_shard_filter = [&](ASTPtr & query, uint64_t shard_num)
+
+        const std::string_view custom_key = settings.parallel_replicas_custom_key.value;
+        assert(!custom_key.empty());
+
+        ParserExpression parser;
+        auto custom_key_ast = parseQuery(
+            parser,
+            custom_key.data(),
+            custom_key.data() + custom_key.size(),
+            "parallel replicas custom key",
+            settings.max_query_size,
+            settings.max_parser_depth);
+
+        additional_shard_filter_generator = [&](uint64_t shard_num) -> ASTPtr
         {
             ParserExpression parser;
             auto custom_key_ast = parseQuery(
@@ -803,130 +807,104 @@ void StorageDistributed::read(
                 equals_function->arguments = args;
                 equals_function->children.push_back(equals_function->arguments);
 
-                shard_filter = equals_function;
+                return equals_function;
             }
             else
             {
                 assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
-                auto filter_function = [&]
+
+                KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, getInMemoryMetadataPtr()->columns, local_context);
+
+                using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
+
+                RelativeSize size_of_universum = 0;
+                 DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
+
+                size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+                if (custom_key_description.data_types.size() == 1)
                 {
-                    KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, getInMemoryMetadataPtr()->columns, local_context);
-
-                    using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
-
-                    RelativeSize size_of_universum = 0;
-                     DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
-
-                    size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-                    if (custom_key_description.data_types.size() == 1)
-                    {
-                        if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
-                            size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
-                        else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
-                            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-                        else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
-                            size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
-                        else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
-                            size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
-                    }
-
-                    if (size_of_universum == RelativeSize(0))
-                        throw Exception(
-                            ErrorCodes::LOGICAL_ERROR,
-                            "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
-
-                    RelativeSize relative_range_size = RelativeSize(1) / query_info.getCluster()->getShardCount();
-                    RelativeSize relative_range_offset = relative_range_size * RelativeSize(shard_num - 1);
-
-                    /// Calculate the half-interval of `[lower, upper)` column values.
-                    bool has_lower_limit = false;
-                    bool has_upper_limit = false;
-
-                    RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
-                    RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
-
-                    UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
-                    UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
-
-                    if (lower > 0)
-                        has_lower_limit = true;
-
-                    if (upper_limit_rational < size_of_universum)
-                        has_upper_limit = true;
-
-                    assert(has_lower_limit || has_upper_limit);
-
-                    /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
-                    std::shared_ptr<ASTFunction> lower_function;
-                    std::shared_ptr<ASTFunction> upper_function;
-
-                    if (has_lower_limit)
-                    {
-                        ASTPtr args = std::make_shared<ASTExpressionList>();
-                        args->children.push_back(custom_key_ast);
-                        args->children.push_back(std::make_shared<ASTLiteral>(lower));
-
-                        lower_function = std::make_shared<ASTFunction>();
-                        lower_function->name = "greaterOrEquals";
-                        lower_function->arguments = args;
-                        lower_function->children.push_back(lower_function->arguments);
-
-                        if (!has_upper_limit)
-                            return lower_function;
-                    }
-
-                    if (has_upper_limit)
-                    {
-                        ASTPtr args = std::make_shared<ASTExpressionList>();
-                        args->children.push_back(custom_key_ast);
-                        args->children.push_back(std::make_shared<ASTLiteral>(upper));
-
-                        upper_function = std::make_shared<ASTFunction>();
-                        upper_function->name = "less";
-                        upper_function->arguments = args;
-                        upper_function->children.push_back(upper_function->arguments);
-
-                        if (!has_lower_limit)
-                            return upper_function;
-                    }
-
-                    assert(has_lower_limit && has_upper_limit);
-
-                    ASTPtr args = std::make_shared<ASTExpressionList>();
-                    args->children.push_back(lower_function);
-                    args->children.push_back(upper_function);
-
-                    auto and_function = std::make_shared<ASTFunction>();
-                    and_function->name = "and";
-                    and_function->arguments = args;
-                    and_function->children.push_back(and_function->arguments);
-
-                    return and_function;
-                };
-
-                shard_filter = filter_function();
-            }
-
-            if (shard_filter)
-            {
-                auto & select_query = query->as<ASTSelectQuery &>();
-
-                auto where_expression = select_query.where();
-                if (where_expression)
-                {
-                    ASTPtr args = std::make_shared<ASTExpressionList>();
-                    args->children.push_back(where_expression);
-                    args->children.push_back(shard_filter);
-
-                    auto and_function = std::make_shared<ASTFunction>();
-                    and_function->name = "and";
-                    and_function->arguments = args;
-                    and_function->children.push_back(and_function->arguments);
-
-                    shard_filter = std::move(and_function);
+                    if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
+                        size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
+                    else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
+                        size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+                    else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
+                        size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
+                    else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
+                        size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
                 }
 
-                select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter));
+                if (size_of_universum == RelativeSize(0))
+                    throw Exception(
+                        ErrorCodes::LOGICAL_ERROR,
+                        "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
+
+                RelativeSize relative_range_size = RelativeSize(1) / query_info.getCluster()->getShardCount();
+                RelativeSize relative_range_offset = relative_range_size * RelativeSize(shard_num - 1);
+
+                /// Calculate the half-interval of `[lower, upper)` column values.
+                bool has_lower_limit = false;
+                bool has_upper_limit = false;
+
+                RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
+                RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
+
+                UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
+                UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
+
+                if (lower > 0)
+                    has_lower_limit = true;
+
+                if (upper_limit_rational < size_of_universum)
+                    has_upper_limit = true;
+
+                assert(has_lower_limit || has_upper_limit);
+
+                /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
+                std::shared_ptr<ASTFunction> lower_function;
+                std::shared_ptr<ASTFunction> upper_function;
+
+                if (has_lower_limit)
+                {
+                    ASTPtr args = std::make_shared<ASTExpressionList>();
+                    args->children.push_back(custom_key_ast);
+                    args->children.push_back(std::make_shared<ASTLiteral>(lower));
+
+                    lower_function = std::make_shared<ASTFunction>();
+                    lower_function->name = "greaterOrEquals";
+                    lower_function->arguments = args;
+                    lower_function->children.push_back(lower_function->arguments);
+
+                    if (!has_upper_limit)
+                        return lower_function;
+                }
+
+                if (has_upper_limit)
+                {
+                    ASTPtr args = std::make_shared<ASTExpressionList>();
+                    args->children.push_back(custom_key_ast);
+                    args->children.push_back(std::make_shared<ASTLiteral>(upper));
+
+                    upper_function = std::make_shared<ASTFunction>();
+                    upper_function->name = "less";
+                    upper_function->arguments = args;
+                    upper_function->children.push_back(upper_function->arguments);
+
+                    if (!has_lower_limit)
+                        return upper_function;
+                }
+
+                assert(has_lower_limit && has_upper_limit);
+
+                ASTPtr args = std::make_shared<ASTExpressionList>();
+                args->children.push_back(lower_function);
+                args->children.push_back(upper_function);
+
+                auto and_function = std::make_shared<ASTFunction>();
+                and_function->name = "and";
+                and_function->arguments = args;
+                and_function->children.push_back(and_function->arguments);
+
+                return and_function;
             }
         };
     }
@@ -945,7 +923,7 @@ void StorageDistributed::read(
             select_stream_factory, log, modified_query_ast,
             local_context, query_info,
             sharding_key_expr, sharding_key_column_name,
-            query_info.cluster, add_additional_shard_filter);
+            query_info.cluster, additional_shard_filter_generator);
 
     /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
     if (!query_plan.isInitialized())

From 7a75144ce372da6134ae624668006c1348889d1e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 09:20:40 +0000
Subject: [PATCH 013/333] Refactor

---
 src/Interpreters/Cluster.cpp                  |  11 +-
 src/Interpreters/Cluster.h                    |   4 +-
 .../ClusterProxy/executeQuery.cpp             |  13 +-
 src/Storages/StorageDistributed.cpp           | 207 +++++++-----------
 4 files changed, 92 insertions(+), 143 deletions(-)

diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 9f0a9d3b35c..7e3e1baf6f2 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -653,9 +653,9 @@ void Cluster::initMisc()
     }
 }
 
-std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const
+std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard) const
 {
-    return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)};
+    return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard)};
 }
 
 std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
@@ -668,7 +668,7 @@ std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector
     return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
 }
 
-Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings)
+Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard)
 {
     if (from.addresses_with_failover.empty())
         throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
@@ -678,6 +678,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
     for (size_t shard_index : collections::range(0, from.shards_info.size()))
     {
         const auto & replicas = from.addresses_with_failover[shard_index];
+        size_t replicas_used = 0;
         for (const auto & address : replicas)
         {
             if (!unique_hosts.emplace(address.host_name, address.port).second)
@@ -685,6 +686,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
 
             ShardInfo info;
             info.shard_num = ++shard_num;
+            ++replicas_used;
 
             if (address.is_local)
                 info.local_addresses.push_back(address);
@@ -711,6 +713,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
 
             addresses_with_failover.emplace_back(Addresses{address});
             shards_info.emplace_back(std::move(info));
+
+            if (max_replicas_from_shard && replicas_used == max_replicas_from_shard)
+                break;
         }
     }
 
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index ada04aa1cae..77e87e48ca7 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -250,7 +250,7 @@ public:
     std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
 
     /// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
-    std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
+    std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0) const;
 
     /// Returns false if cluster configuration doesn't allow to use it for cross-replication.
     /// NOTE: true does not mean, that it's actually a cross-replication cluster.
@@ -271,7 +271,7 @@ private:
 
     /// For getClusterWithReplicasAsShards implementation
     struct ReplicasAsShardsTag {};
-    Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings);
+    Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard);
 
     /// Inter-server secret
     String secret;
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index d8d55b5486b..02673b9f7ac 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -191,18 +191,7 @@ void executeQuery(
 
                 auto where_expression = select_query.where();
                 if (where_expression)
-                {
-                    ASTPtr args = std::make_shared<ASTExpressionList>();
-                    args->children.push_back(where_expression);
-                    args->children.push_back(shard_filter);
-
-                    auto and_function = std::make_shared<ASTFunction>();
-                    and_function->name = "and";
-                    and_function->arguments = args;
-                    and_function->children.push_back(and_function->arguments);
-
-                    shard_filter = std::move(and_function);
-                }
+                    shard_filter = makeASTFunction("and", where_expression, shard_filter);
 
                 select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter));
             }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index a7c0dafaf62..82c38868cb4 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -445,10 +445,6 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
 
     ClusterPtr cluster = getCluster();
 
-    // if it's custom_key we will turn replicas into shards and filter specific data on each of them
-    if (settings.max_parallel_replicas > 1 && cluster->getShardCount() == 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
-        cluster = cluster->getClusterWithReplicasAsShards(settings);
-
     query_info.cluster = cluster;
 
     size_t nodes = getClusterQueriedNodes(settings, cluster);
@@ -758,15 +754,19 @@ void StorageDistributed::read(
     bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
         && !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
 
-    auto shard_count = query_info.getCluster()->getShardCount();
-
     ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
     if (settings.max_parallel_replicas > 1
-        && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+        && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
+        && getCluster()->getShardCount() == 1)
     {
+        LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into shards");
+
+        query_info.cluster = getCluster()->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
+        query_info.optimized_cluster = nullptr; // it's a single shard cluster so nothing could've been optimized
 
         const std::string_view custom_key = settings.parallel_replicas_custom_key.value;
-        assert(!custom_key.empty());
+        if (custom_key.empty())
+            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
 
         ParserExpression parser;
         auto custom_key_ast = parseQuery(
@@ -777,135 +777,90 @@ void StorageDistributed::read(
             settings.max_query_size,
             settings.max_parser_depth);
 
-        additional_shard_filter_generator = [&](uint64_t shard_num) -> ASTPtr
+        additional_shard_filter_generator = [&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) mutable -> ASTPtr
         {
-            ParserExpression parser;
-            auto custom_key_ast = parseQuery(
-                parser, settings.parallel_replicas_custom_key.value.data(), settings.parallel_replicas_custom_key.value.data() + settings.parallel_replicas_custom_key.value.size(),
-                "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
-
-            ASTPtr shard_filter = nullptr ;
             if (settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
             {
                 // first we do modulo with replica count
-                ASTPtr args = std::make_shared<ASTExpressionList>();
-                args->children.push_back(custom_key_ast);
-                args->children.push_back(std::make_shared<ASTLiteral>(shard_count));
-
-                auto modulo_function = std::make_shared<ASTFunction>();
-                modulo_function->name = "positiveModulo";
-                modulo_function->arguments = args;
-                modulo_function->children.push_back(modulo_function->arguments);
+                auto modulo_function = makeASTFunction("positiveModulo", custom_key_ast, std::make_shared<ASTLiteral>(shard_count));
 
                 /// then we compare result to the current replica number (offset)
-                args = std::make_shared<ASTExpressionList>();
-                args->children.push_back(modulo_function);
-                args->children.push_back(std::make_shared<ASTLiteral>(shard_num - 1));
-
-                auto equals_function = std::make_shared<ASTFunction>();
-                equals_function->name = "equals";
-                equals_function->arguments = args;
-                equals_function->children.push_back(equals_function->arguments);
+                auto equals_function = makeASTFunction("equals", std::move(modulo_function), std::make_shared<ASTLiteral>(shard_num - 1));
 
                 return equals_function;
             }
-            else
+
+            assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
+
+            KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, getInMemoryMetadataPtr()->columns, local_context);
+
+            using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
+
+            RelativeSize size_of_universum = 0;
+             DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
+
+            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+            if (custom_key_description.data_types.size() == 1)
             {
-                assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
-
-                KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, getInMemoryMetadataPtr()->columns, local_context);
-
-                using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
-
-                RelativeSize size_of_universum = 0;
-                 DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
-
-                size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-                if (custom_key_description.data_types.size() == 1)
-                {
-                    if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
-                        size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
-                    else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
-                        size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-                    else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
-                        size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
-                    else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
-                        size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
-                }
-
-                if (size_of_universum == RelativeSize(0))
-                    throw Exception(
-                        ErrorCodes::LOGICAL_ERROR,
-                        "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
-
-                RelativeSize relative_range_size = RelativeSize(1) / query_info.getCluster()->getShardCount();
-                RelativeSize relative_range_offset = relative_range_size * RelativeSize(shard_num - 1);
-
-                /// Calculate the half-interval of `[lower, upper)` column values.
-                bool has_lower_limit = false;
-                bool has_upper_limit = false;
-
-                RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
-                RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
-
-                UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
-                UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
-
-                if (lower > 0)
-                    has_lower_limit = true;
-
-                if (upper_limit_rational < size_of_universum)
-                    has_upper_limit = true;
-
-                assert(has_lower_limit || has_upper_limit);
-
-                /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
-                std::shared_ptr<ASTFunction> lower_function;
-                std::shared_ptr<ASTFunction> upper_function;
-
-                if (has_lower_limit)
-                {
-                    ASTPtr args = std::make_shared<ASTExpressionList>();
-                    args->children.push_back(custom_key_ast);
-                    args->children.push_back(std::make_shared<ASTLiteral>(lower));
-
-                    lower_function = std::make_shared<ASTFunction>();
-                    lower_function->name = "greaterOrEquals";
-                    lower_function->arguments = args;
-                    lower_function->children.push_back(lower_function->arguments);
-
-                    if (!has_upper_limit)
-                        return lower_function;
-                }
-
-                if (has_upper_limit)
-                {
-                    ASTPtr args = std::make_shared<ASTExpressionList>();
-                    args->children.push_back(custom_key_ast);
-                    args->children.push_back(std::make_shared<ASTLiteral>(upper));
-
-                    upper_function = std::make_shared<ASTFunction>();
-                    upper_function->name = "less";
-                    upper_function->arguments = args;
-                    upper_function->children.push_back(upper_function->arguments);
-
-                    if (!has_lower_limit)
-                        return upper_function;
-                }
-
-                assert(has_lower_limit && has_upper_limit);
-
-                ASTPtr args = std::make_shared<ASTExpressionList>();
-                args->children.push_back(lower_function);
-                args->children.push_back(upper_function);
-
-                auto and_function = std::make_shared<ASTFunction>();
-                and_function->name = "and";
-                and_function->arguments = args;
-                and_function->children.push_back(and_function->arguments);
-
-                return and_function;
+                if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
+                    size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
+                else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
+                    size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+                else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
+                    size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
+                else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
+                    size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
             }
+
+            if (size_of_universum == RelativeSize(0))
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
+
+            RelativeSize relative_range_size = RelativeSize(1) / shard_count;
+            RelativeSize relative_range_offset = relative_range_size * RelativeSize(shard_num - 1);
+
+            /// Calculate the half-interval of `[lower, upper)` column values.
+            bool has_lower_limit = false;
+            bool has_upper_limit = false;
+
+            RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
+            RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
+
+            UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
+            UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
+
+            if (lower > 0)
+                has_lower_limit = true;
+
+            if (upper_limit_rational < size_of_universum)
+                has_upper_limit = true;
+
+            assert(has_lower_limit || has_upper_limit);
+
+            /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
+            std::shared_ptr<ASTFunction> lower_function;
+            std::shared_ptr<ASTFunction> upper_function;
+
+            if (has_lower_limit)
+            {
+                lower_function = makeASTFunction("greaterOrEquals", custom_key_ast, std::make_shared<ASTLiteral>(lower));
+
+                if (!has_upper_limit)
+                    return lower_function;
+            }
+
+            if (has_upper_limit)
+            {
+                upper_function = makeASTFunction("less", custom_key_ast, std::make_shared<ASTLiteral>(upper));
+
+                if (!has_lower_limit)
+                    return upper_function;
+            }
+
+            assert(upper_function && lower_function);
+
+            return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
         };
     }
 

From 3b0c63551ec244ae1d9ff38e70c04c98b5342b7d Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 10:26:38 +0000
Subject: [PATCH 014/333] Combine approaches

---
 src/Interpreters/InterpreterSelectQuery.cpp   |  84 +++++++++++---
 .../getCustomKeyFilterForParallelReplicas.cpp | 109 ++++++++++++++++++
 .../getCustomKeyFilterForParallelReplicas.h   |  21 ++++
 src/Storages/StorageDistributed.cpp           |  89 +-------------
 4 files changed, 206 insertions(+), 97 deletions(-)
 create mode 100644 src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
 create mode 100644 src/Interpreters/getCustomKeyFilterForParallelReplicas.h

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 48326afda45..3612458196c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -38,6 +38,7 @@
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/replaceAliasColumnsInQuery.h>
 #include <Interpreters/RewriteCountDistinctVisitor.h>
+#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <Processors/QueryPlan/AggregatingStep.h>
@@ -94,6 +95,9 @@
 #include <Common/scope_guard_safe.h>
 #include <Parsers/FunctionParameterValuesVisitor.h>
 #include <Common/typeid_cast.h>
+#include "Core/SettingsEnums.h"
+
+#include <boost/rational.hpp>
 
 namespace DB
 {
@@ -112,6 +116,8 @@ namespace ErrorCodes
     extern const int INVALID_WITH_FILL_EXPRESSION;
     extern const int ACCESS_DENIED;
     extern const int UNKNOWN_IDENTIFIER;
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
@@ -227,10 +233,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 InterpreterSelectQuery::~InterpreterSelectQuery() = default;
 
 
+namespace
+{
+
 /** There are no limits on the maximum size of the result for the subquery.
   *  Since the result of the query is not the result of the entire query.
   */
-static ContextPtr getSubqueryContext(const ContextPtr & context)
+ContextPtr getSubqueryContext(const ContextPtr & context)
 {
     auto subquery_context = Context::createCopy(context);
     Settings subquery_settings = context->getSettings();
@@ -242,7 +251,7 @@ static ContextPtr getSubqueryContext(const ContextPtr & context)
     return subquery_context;
 }
 
-static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
+void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
 {
     ASTSelectQuery & select = query->as<ASTSelectQuery &>();
 
@@ -262,7 +271,7 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table
 }
 
 /// Checks that the current user has the SELECT privilege.
-static void checkAccessRightsForSelect(
+void checkAccessRightsForSelect(
     const ContextPtr & context,
     const StorageID & table_id,
     const StorageMetadataPtr & table_metadata,
@@ -292,7 +301,7 @@ static void checkAccessRightsForSelect(
     context->checkAccess(AccessType::SELECT, table_id, syntax_analyzer_result.requiredSourceColumnsForAccessCheck());
 }
 
-static ASTPtr parseAdditionalFilterConditionForTable(
+ASTPtr parseAdditionalFilterConditionForTable(
     const Map & setting,
     const DatabaseAndTableWithAlias & target,
     const Context & context)
@@ -319,8 +328,20 @@ static ASTPtr parseAdditionalFilterConditionForTable(
     return nullptr;
 }
 
+ASTPtr parseParallelReplicaCustomKey(const String & setting, const Context & context)
+{
+    if (setting.empty())
+        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
+
+    ParserExpression parser;
+    const auto & settings = context.getSettingsRef();
+    return parseQuery(
+        parser, setting.data(), setting.data() + setting.size(),
+        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
+}
+
 /// Returns true if we should ignore quotas and limits for a specified table in the system database.
-static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
+bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
 {
     if (table_id.database_name == DatabaseCatalog::SYSTEM_DATABASE)
     {
@@ -331,6 +352,8 @@ static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
     return false;
 }
 
+}
+
 InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const ContextPtr & context_,
@@ -501,7 +524,24 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
-    auto analyze = [&] (bool try_move_to_prewhere)
+    ASTPtr parallel_replicas_custom_filter_ast = nullptr;
+    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+    {
+        LOG_INFO(log, "Processing query on a replica using custom_key");
+        if (!storage)
+            throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
+
+        auto custom_key_ast = parseParallelReplicaCustomKey(settings.parallel_replicas_custom_key, *context);
+        parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
+            settings.parallel_replicas_count,
+            settings.parallel_replica_offset,
+            std::move(custom_key_ast),
+            settings.parallel_replicas_custom_key_filter_type,
+            *storage,
+            context);
+    }
+
+    auto analyze = [&](bool try_move_to_prewhere)
     {
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
@@ -666,6 +706,16 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 query_info.filter_asts.push_back(query_info.additional_filter_ast);
             }
 
+            if (parallel_replicas_custom_filter_ast)
+            {
+                parallel_replicas_custom_filter_info = generateFilterActions(
+                        table_id, parallel_replicas_custom_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns,
+                        prepared_sets);
+
+                parallel_replicas_custom_filter_info->do_remove_column = true;
+                query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast);
+            }
+
             source_header = storage_snapshot->getSampleBlockForColumns(required_columns, parameter_values);
         }
 
@@ -1409,17 +1459,23 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                 query_plan.addStep(std::move(row_level_security_step));
             }
 
-            if (additional_filter_info)
+            const auto add_filter_step = [&](const auto & new_filter_info, const std::string & description)
             {
-                auto additional_filter_step = std::make_unique<FilterStep>(
+                auto filter_step = std::make_unique<FilterStep>(
                     query_plan.getCurrentDataStream(),
-                    additional_filter_info->actions,
-                    additional_filter_info->column_name,
-                    additional_filter_info->do_remove_column);
+                    new_filter_info->actions,
+                    new_filter_info->column_name,
+                    new_filter_info->do_remove_column);
 
-                additional_filter_step->setStepDescription("Additional filter");
-                query_plan.addStep(std::move(additional_filter_step));
-            }
+                filter_step->setStepDescription(description);
+                query_plan.addStep(std::move(filter_step));
+            };
+
+            if (additional_filter_info)
+                add_filter_step(additional_filter_info, "Additional filter");
+
+            if (parallel_replicas_custom_filter_info)
+                add_filter_step(parallel_replicas_custom_filter_info, "Parallel replica custom key filter");
 
             if (expressions.before_array_join)
             {
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
new file mode 100644
index 00000000000..8e7fa8386fe
--- /dev/null
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -0,0 +1,109 @@
+#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
+
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTSampleRatio.h>
+
+#include <DataTypes/DataTypesNumber.h>
+
+#include <boost/rational.hpp>
+
+
+namespace DB
+{
+
+ASTPtr getCustomKeyFilterForParallelReplica(
+    size_t replicas_count,
+    size_t replica_num,
+    ASTPtr custom_key_ast,
+    ParallelReplicasCustomKeyFilterType filter_type,
+    const IStorage & storage,
+    const ContextPtr & context)
+{
+    assert(replicas_count > 1);
+    if (filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
+    {
+        // first we do modulo with replica count
+        auto modulo_function = makeASTFunction("positiveModulo", custom_key_ast, std::make_shared<ASTLiteral>(replicas_count));
+
+        /// then we compare result to the current replica number (offset)
+        auto equals_function = makeASTFunction("equals", std::move(modulo_function), std::make_shared<ASTLiteral>(replica_num));
+
+        return equals_function;
+    }
+
+    assert(filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
+
+    KeyDescription custom_key_description
+        = KeyDescription::getKeyFromAST(custom_key_ast, storage.getInMemoryMetadataPtr()->columns, context);
+
+    using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
+
+    RelativeSize size_of_universum = 0;
+    DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
+
+    size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+    if (custom_key_description.data_types.size() == 1)
+    {
+        if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
+        else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
+        else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
+        else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
+            size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
+    }
+
+    if (size_of_universum == RelativeSize(0))
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Invalid custom key column type: {}. Must be one unsigned integer type",
+            custom_key_column_type->getName());
+
+    RelativeSize relative_range_size = RelativeSize(1) / replicas_count;
+    RelativeSize relative_range_offset = relative_range_size * RelativeSize(replica_num);
+
+    /// Calculate the half-interval of `[lower, upper)` column values.
+    bool has_lower_limit = false;
+    bool has_upper_limit = false;
+
+    RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
+    RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
+
+    UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
+    UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
+
+    if (lower > 0)
+        has_lower_limit = true;
+
+    if (upper_limit_rational < size_of_universum)
+        has_upper_limit = true;
+
+    assert(has_lower_limit || has_upper_limit);
+
+    /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
+    std::shared_ptr<ASTFunction> lower_function;
+    std::shared_ptr<ASTFunction> upper_function;
+
+    if (has_lower_limit)
+    {
+        lower_function = makeASTFunction("greaterOrEquals", custom_key_ast, std::make_shared<ASTLiteral>(lower));
+
+        if (!has_upper_limit)
+            return lower_function;
+    }
+
+    if (has_upper_limit)
+    {
+        upper_function = makeASTFunction("less", custom_key_ast, std::make_shared<ASTLiteral>(upper));
+
+        if (!has_lower_limit)
+            return upper_function;
+    }
+
+    assert(upper_function && lower_function);
+
+    return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
+}
+
+}
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
new file mode 100644
index 00000000000..3d830d1d606
--- /dev/null
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <Interpreters/Context_fwd.h>
+#include <Parsers/IAST_fwd.h>
+#include <Storages/IStorage.h>
+#include "Core/SettingsEnums.h"
+
+namespace DB
+{
+
+/// Get AST for filter created from custom_key
+/// replica_num is the number of the replica for which we are generating filter starting from 0
+ASTPtr getCustomKeyFilterForParallelReplica(
+    size_t replicas_count,
+    size_t replica_num,
+    ASTPtr custom_key_ast,
+    ParallelReplicasCustomKeyFilterType filter_type,
+    const IStorage & storage,
+    const ContextPtr & context);
+
+}
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 82c38868cb4..a0aa646869f 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -61,6 +61,8 @@
 #include <Interpreters/getClusterName.h>
 #include <Interpreters/getTableExpressions.h>
 #include <Interpreters/RequiredSourceColumnsVisitor.h>
+#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
+
 #include <Functions/IFunction.h>
 #include <TableFunctions/TableFunctionView.h>
 #include <TableFunctions/TableFunctionFactory.h>
@@ -777,90 +779,11 @@ void StorageDistributed::read(
             settings.max_query_size,
             settings.max_parser_depth);
 
-        additional_shard_filter_generator = [&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) mutable -> ASTPtr
+        additional_shard_filter_generator =
+            [&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr
         {
-            if (settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
-            {
-                // first we do modulo with replica count
-                auto modulo_function = makeASTFunction("positiveModulo", custom_key_ast, std::make_shared<ASTLiteral>(shard_count));
-
-                /// then we compare result to the current replica number (offset)
-                auto equals_function = makeASTFunction("equals", std::move(modulo_function), std::make_shared<ASTLiteral>(shard_num - 1));
-
-                return equals_function;
-            }
-
-            assert(settings.parallel_replicas_custom_key_filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
-
-            KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, getInMemoryMetadataPtr()->columns, local_context);
-
-            using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
-
-            RelativeSize size_of_universum = 0;
-             DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
-
-            size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-            if (custom_key_description.data_types.size() == 1)
-            {
-                if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
-                    size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
-                else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
-                    size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
-                else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
-                    size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
-                else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
-                    size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
-            }
-
-            if (size_of_universum == RelativeSize(0))
-                throw Exception(
-                    ErrorCodes::LOGICAL_ERROR,
-                    "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName());
-
-            RelativeSize relative_range_size = RelativeSize(1) / shard_count;
-            RelativeSize relative_range_offset = relative_range_size * RelativeSize(shard_num - 1);
-
-            /// Calculate the half-interval of `[lower, upper)` column values.
-            bool has_lower_limit = false;
-            bool has_upper_limit = false;
-
-            RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
-            RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
-
-            UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
-            UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
-
-            if (lower > 0)
-                has_lower_limit = true;
-
-            if (upper_limit_rational < size_of_universum)
-                has_upper_limit = true;
-
-            assert(has_lower_limit || has_upper_limit);
-
-            /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
-            std::shared_ptr<ASTFunction> lower_function;
-            std::shared_ptr<ASTFunction> upper_function;
-
-            if (has_lower_limit)
-            {
-                lower_function = makeASTFunction("greaterOrEquals", custom_key_ast, std::make_shared<ASTLiteral>(lower));
-
-                if (!has_upper_limit)
-                    return lower_function;
-            }
-
-            if (has_upper_limit)
-            {
-                upper_function = makeASTFunction("less", custom_key_ast, std::make_shared<ASTLiteral>(upper));
-
-                if (!has_lower_limit)
-                    return upper_function;
-            }
-
-            assert(upper_function && lower_function);
-
-            return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
+            return getCustomKeyFilterForParallelReplica(
+                shard_count, shard_num - 1, custom_key_ast, settings.parallel_replicas_custom_key_filter_type, *this, local_context);
         };
     }
 

From ddfb913f9991b4e410744ebb0eac260d842df4e2 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 11:28:26 +0000
Subject: [PATCH 015/333] better

---
 src/Interpreters/InterpreterSelectQuery.cpp   | 18 +----
 .../getCustomKeyFilterForParallelReplicas.cpp | 23 +++++-
 .../getCustomKeyFilterForParallelReplicas.h   |  4 +-
 src/Storages/StorageDistributed.cpp           | 75 ++++++++++---------
 4 files changed, 64 insertions(+), 56 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 3612458196c..0aa48ca1998 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -95,9 +95,6 @@
 #include <Common/scope_guard_safe.h>
 #include <Parsers/FunctionParameterValuesVisitor.h>
 #include <Common/typeid_cast.h>
-#include "Core/SettingsEnums.h"
-
-#include <boost/rational.hpp>
 
 namespace DB
 {
@@ -117,7 +114,6 @@ namespace ErrorCodes
     extern const int ACCESS_DENIED;
     extern const int UNKNOWN_IDENTIFIER;
     extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
@@ -328,18 +324,6 @@ ASTPtr parseAdditionalFilterConditionForTable(
     return nullptr;
 }
 
-ASTPtr parseParallelReplicaCustomKey(const String & setting, const Context & context)
-{
-    if (setting.empty())
-        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
-
-    ParserExpression parser;
-    const auto & settings = context.getSettingsRef();
-    return parseQuery(
-        parser, setting.data(), setting.data() + setting.size(),
-        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
-}
-
 /// Returns true if we should ignore quotas and limits for a specified table in the system database.
 bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
 {
@@ -531,7 +515,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (!storage)
             throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
 
-        auto custom_key_ast = parseParallelReplicaCustomKey(settings.parallel_replicas_custom_key, *context);
+        auto custom_key_ast = parseParallelReplicaCustomKey(settings.parallel_replicas_custom_key.value, *context);
         parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
             settings.parallel_replicas_count,
             settings.parallel_replica_offset,
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
index 8e7fa8386fe..306ca129e16 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -2,6 +2,10 @@
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSampleRatio.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/parseQuery.h>
+
+#include <Interpreters/Context.h>
 
 #include <DataTypes/DataTypesNumber.h>
 
@@ -11,6 +15,12 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
+}
+
 ASTPtr getCustomKeyFilterForParallelReplica(
     size_t replicas_count,
     size_t replica_num,
@@ -56,7 +66,7 @@ ASTPtr getCustomKeyFilterForParallelReplica(
 
     if (size_of_universum == RelativeSize(0))
         throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
+            ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
             "Invalid custom key column type: {}. Must be one unsigned integer type",
             custom_key_column_type->getName());
 
@@ -106,4 +116,15 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
 }
 
+ASTPtr parseParallelReplicaCustomKey(std::string_view custom_key, const Context & context)
+{
+    if (custom_key.empty())
+        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
+
+    ParserExpression parser;
+    const auto & settings = context.getSettingsRef();
+    return parseQuery(
+        parser, custom_key.data(), custom_key.data() + custom_key.size(),
+        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
+}
 }
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
index 3d830d1d606..251e72e0a97 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
@@ -3,7 +3,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage.h>
-#include "Core/SettingsEnums.h"
+#include <Core/SettingsEnums.h>
 
 namespace DB
 {
@@ -18,4 +18,6 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     const IStorage & storage,
     const ContextPtr & context);
 
+ASTPtr parseParallelReplicaCustomKey(std::string_view custom_key, const Context & context);
+
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index a0aa646869f..e66a363003e 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -135,7 +135,6 @@ namespace ErrorCodes
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
     extern const int UNSUPPORTED_METHOD;
-    extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
 namespace ActionLocks
@@ -447,29 +446,40 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
 
     ClusterPtr cluster = getCluster();
 
-    query_info.cluster = cluster;
-
     size_t nodes = getClusterQueriedNodes(settings, cluster);
 
-    /// Always calculate optimized cluster here, to avoid conditions during read()
-    /// (Anyway it will be calculated in the read())
-    if (nodes > 1 && settings.optimize_skip_unused_shards)
+    if (settings.max_parallel_replicas > 1
+        && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
+        && cluster->getShardCount() == 1)
     {
-        ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
-        if (optimized_cluster)
-        {
-            LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
-                    makeFormattedListOfShards(optimized_cluster));
+        LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards");
 
-            cluster = optimized_cluster;
-            query_info.optimized_cluster = cluster;
+        query_info.cluster = cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
+    }
+    else
+    {
+        query_info.cluster = cluster;
 
-            nodes = getClusterQueriedNodes(settings, cluster);
-        }
-        else
+        if (nodes > 1 && settings.optimize_skip_unused_shards)
         {
-            LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}",
-                    has_sharding_key ? "" : " (no sharding key)");
+            /// Always calculate optimized cluster here, to avoid conditions during read()
+            /// (Anyway it will be calculated in the read())
+            ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
+            if (optimized_cluster)
+            {
+                LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
+                        makeFormattedListOfShards(optimized_cluster));
+
+                cluster = optimized_cluster;
+                query_info.optimized_cluster = cluster;
+
+                nodes = getClusterQueriedNodes(settings, cluster);
+            }
+            else
+            {
+                LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}",
+                        has_sharding_key ? "" : " (no sharding key)");
+            }
         }
     }
 
@@ -751,33 +761,21 @@ void StorageDistributed::read(
             storage_snapshot,
             processed_stage);
 
-
     auto settings = local_context->getSettingsRef();
-    bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
-        && !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
 
     ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
     if (settings.max_parallel_replicas > 1
         && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
         && getCluster()->getShardCount() == 1)
     {
-        LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into shards");
+        if (query_info.getCluster()->getShardCount() == 1)
+        {
+            // we are reading from single shard replica but didn't transform replicas
+            // into virtual shards with custom_key set
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards");
+        }
 
-        query_info.cluster = getCluster()->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
-        query_info.optimized_cluster = nullptr; // it's a single shard cluster so nothing could've been optimized
-
-        const std::string_view custom_key = settings.parallel_replicas_custom_key.value;
-        if (custom_key.empty())
-            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
-
-        ParserExpression parser;
-        auto custom_key_ast = parseQuery(
-            parser,
-            custom_key.data(),
-            custom_key.data() + custom_key.size(),
-            "parallel replicas custom key",
-            settings.max_query_size,
-            settings.max_parser_depth);
+        auto custom_key_ast = parseParallelReplicaCustomKey(settings.parallel_replicas_custom_key.value, *local_context);
 
         additional_shard_filter_generator =
             [&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr
@@ -787,6 +785,9 @@ void StorageDistributed::read(
         };
     }
 
+    bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
+        && !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
+
     if (parallel_replicas)
         ClusterProxy::executeQueryWithParallelReplicas(
             query_plan, main_table, remote_table_function_ptr,

From 53b53a1ec90c385c2b68806e4a7f4078ecf64945 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 12:04:07 +0000
Subject: [PATCH 016/333] Add replcia shuffling

---
 src/Interpreters/Cluster.cpp | 92 ++++++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 31 deletions(-)

diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 7e3e1baf6f2..97970691025 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -15,6 +15,7 @@
 #include <base/sort.h>
 #include <boost/range/algorithm_ext/erase.hpp>
 
+#include <span>
 
 namespace DB
 {
@@ -677,45 +678,74 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
     std::set<std::pair<String, int>> unique_hosts;
     for (size_t shard_index : collections::range(0, from.shards_info.size()))
     {
-        const auto & replicas = from.addresses_with_failover[shard_index];
-        size_t replicas_used = 0;
-        for (const auto & address : replicas)
+        auto create_shards_from_replicas = [&](std::span<const Address> replicas)
         {
-            if (!unique_hosts.emplace(address.host_name, address.port).second)
-                continue;   /// Duplicate host, skip.
+            for (const auto & address : replicas)
+            {
+                if (!unique_hosts.emplace(address.host_name, address.port).second)
+                    continue;   /// Duplicate host, skip.
 
-            ShardInfo info;
-            info.shard_num = ++shard_num;
-            ++replicas_used;
+                ShardInfo info;
+                info.shard_num = ++shard_num;
 
-            if (address.is_local)
-                info.local_addresses.push_back(address);
+                if (address.is_local)
+                    info.local_addresses.push_back(address);
 
-            info.all_addresses.push_back(address);
+                info.all_addresses.push_back(address);
 
-            auto pool = ConnectionPoolFactory::instance().get(
-                static_cast<unsigned>(settings.distributed_connections_pool_size),
-                address.host_name,
-                address.port,
-                address.default_database,
-                address.user,
-                address.password,
-                address.quota_key,
-                address.cluster,
-                address.cluster_secret,
-                "server",
-                address.compression,
-                address.secure,
-                address.priority);
+                auto pool = ConnectionPoolFactory::instance().get(
+                    static_cast<unsigned>(settings.distributed_connections_pool_size),
+                    address.host_name,
+                    address.port,
+                    address.default_database,
+                    address.user,
+                    address.password,
+                    address.quota_key,
+                    address.cluster,
+                    address.cluster_secret,
+                    "server",
+                    address.compression,
+                    address.secure,
+                    address.priority);
 
-            info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
-            info.per_replica_pools = {std::move(pool)};
+                info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
+                info.per_replica_pools = {std::move(pool)};
 
-            addresses_with_failover.emplace_back(Addresses{address});
-            shards_info.emplace_back(std::move(info));
+                addresses_with_failover.emplace_back(Addresses{address});
+                shards_info.emplace_back(std::move(info));
+            }
+        };
 
-            if (max_replicas_from_shard && replicas_used == max_replicas_from_shard)
-                break;
+        const auto & replicas = from.addresses_with_failover[shard_index];
+        if (!max_replicas_from_shard || replicas.size() <= max_replicas_from_shard)
+        {
+            create_shards_from_replicas(replicas);
+        }
+        else
+        {
+            std::random_device rd;
+            std::mt19937 gen{rd()};
+            auto shuffled_replicas = replicas;
+
+            if (settings.prefer_localhost_replica)
+            {
+                auto local_replica = std::find_if(shuffled_replicas.begin(), shuffled_replicas.end(), [](const auto & replica) { return replica.is_local; });
+                if (local_replica != shuffled_replicas.end())
+                {
+                    std::swap(*shuffled_replicas.begin(), *local_replica);
+                    std::shuffle(shuffled_replicas.begin() + 1, shuffled_replicas.end(), gen);
+                }
+                else
+                {
+                    std::shuffle(shuffled_replicas.begin(), shuffled_replicas.end(), gen);
+                }
+            }
+            else
+            {
+                std::shuffle(shuffled_replicas.begin(), shuffled_replicas.end(), gen);
+            }
+
+            create_shards_from_replicas(std::span{shuffled_replicas.begin(), shuffled_replicas.begin() + max_replicas_from_shard});
         }
     }
 

From d8adff35c3e1851e3f85ffa0ffa3780b0114bd51 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 12:08:32 +0000
Subject: [PATCH 017/333] Fix style

---
 src/Interpreters/Cluster.cpp                               | 6 +++++-
 src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 97970691025..9661b3f4e19 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -723,16 +723,20 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
         }
         else
         {
+            // shuffle replicas so we don't always pick the same subset
             std::random_device rd;
             std::mt19937 gen{rd()};
             auto shuffled_replicas = replicas;
 
             if (settings.prefer_localhost_replica)
             {
+                // force for local replica to always be included
                 auto local_replica = std::find_if(shuffled_replicas.begin(), shuffled_replicas.end(), [](const auto & replica) { return replica.is_local; });
                 if (local_replica != shuffled_replicas.end())
                 {
-                    std::swap(*shuffled_replicas.begin(), *local_replica);
+                    if (local_replica != shuffled_replicas.begin())
+                        std::swap(*shuffled_replicas.begin(), *local_replica);
+
                     std::shuffle(shuffled_replicas.begin() + 1, shuffled_replicas.end(), gen);
                 }
                 else
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
index 306ca129e16..fbc1d214164 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -17,8 +17,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
+    extern const int BAD_ARGUMENTS;
 }
 
 ASTPtr getCustomKeyFilterForParallelReplica(

From d93cb3e1ddb5192f208e259fa57d4ccd5f29dffa Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 13:24:35 +0000
Subject: [PATCH 018/333] More correct check

---
 src/Storages/StorageDistributed.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index e66a363003e..ea7cb9d33cb 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -307,6 +307,11 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus
     return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas;
 }
 
+bool useVirtualShards(const Settings & settings, const Cluster & cluster)
+{
+    return settings.max_parallel_replicas > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
+        && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
+}
 }
 
 
@@ -448,9 +453,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
 
     size_t nodes = getClusterQueriedNodes(settings, cluster);
 
-    if (settings.max_parallel_replicas > 1
-        && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
-        && cluster->getShardCount() == 1)
+    if (useVirtualShards(settings, *cluster))
     {
         LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards");
 
@@ -764,13 +767,11 @@ void StorageDistributed::read(
     auto settings = local_context->getSettingsRef();
 
     ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
-    if (settings.max_parallel_replicas > 1
-        && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
-        && getCluster()->getShardCount() == 1)
+    if (useVirtualShards(settings, *getCluster()))
     {
         if (query_info.getCluster()->getShardCount() == 1)
         {
-            // we are reading from single shard replica but didn't transform replicas
+            // we are reading from single shard with multiple replicas but didn't transform replicas
             // into virtual shards with custom_key set
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards");
         }

From 6394a004fb8014f81baf98b500163a779640478c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 14:47:58 +0000
Subject: [PATCH 019/333] Refactor

---
 src/Interpreters/Cluster.cpp | 54 +++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 9661b3f4e19..76609cf1a5b 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -669,6 +669,33 @@ std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector
     return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
 }
 
+namespace
+{
+
+void shuffleReplicas(auto & replicas, const Settings & settings)
+{
+    std::random_device rd;
+    std::mt19937 gen{rd()};
+
+    if (settings.prefer_localhost_replica)
+    {
+        // force for local replica to always be included
+        auto local_replica = std::find_if(replicas.begin(), replicas.end(), [](const auto & replica) { return replica.is_local; });
+        if (local_replica != replicas.end())
+        {
+            if (local_replica != replicas.begin())
+                std::swap(*replicas.begin(), *local_replica);
+
+            std::shuffle(replicas.begin() + 1, replicas.end(), gen);
+            return;
+        }
+    }
+
+    std::shuffle(replicas.begin(), replicas.end(), gen);
+}
+
+}
+
 Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard)
 {
     if (from.addresses_with_failover.empty())
@@ -723,32 +750,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
         }
         else
         {
-            // shuffle replicas so we don't always pick the same subset
-            std::random_device rd;
-            std::mt19937 gen{rd()};
             auto shuffled_replicas = replicas;
-
-            if (settings.prefer_localhost_replica)
-            {
-                // force for local replica to always be included
-                auto local_replica = std::find_if(shuffled_replicas.begin(), shuffled_replicas.end(), [](const auto & replica) { return replica.is_local; });
-                if (local_replica != shuffled_replicas.end())
-                {
-                    if (local_replica != shuffled_replicas.begin())
-                        std::swap(*shuffled_replicas.begin(), *local_replica);
-
-                    std::shuffle(shuffled_replicas.begin() + 1, shuffled_replicas.end(), gen);
-                }
-                else
-                {
-                    std::shuffle(shuffled_replicas.begin(), shuffled_replicas.end(), gen);
-                }
-            }
-            else
-            {
-                std::shuffle(shuffled_replicas.begin(), shuffled_replicas.end(), gen);
-            }
-
+            // shuffle replicas so we don't always pick the same subset
+            shuffleReplicas(shuffled_replicas, settings);
             create_shards_from_replicas(std::span{shuffled_replicas.begin(), shuffled_replicas.begin() + max_replicas_from_shard});
         }
     }

From ad429a9312146bb48058af48283ff627cf094b5c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 19 Jan 2023 15:16:05 +0000
Subject: [PATCH 020/333] Add missing checks

---
 src/Client/HedgedConnections.cpp      | 4 +++-
 src/Client/MultiplexedConnections.cpp | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp
index c7392a86a7e..12f1850c282 100644
--- a/src/Client/HedgedConnections.cpp
+++ b/src/Client/HedgedConnections.cpp
@@ -175,7 +175,9 @@ void HedgedConnections::sendQuery(
             modified_settings.group_by_two_level_threshold_bytes = 0;
         }
 
-        const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas;
+        const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1
+            && (settings.parallel_replicas_mode != ParallelReplicasMode::READ_TASKS
+                || !settings.allow_experimental_parallel_reading_from_replicas);
 
         if (offset_states.size() > 1 && enable_sample_offset_parallel_processing)
         {
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index 87eda765a7a..81bae7f501b 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -134,6 +134,7 @@ void MultiplexedConnections::sendQuery(
         }
 
         bool parallel_reading_from_replicas = settings.max_parallel_replicas > 1
+            && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS
             && settings.allow_experimental_parallel_reading_from_replicas
             /// To avoid trying to coordinate with clickhouse-benchmark,
             /// since it uses the same code.
@@ -146,7 +147,9 @@ void MultiplexedConnections::sendQuery(
         }
     }
 
-    const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas;
+    const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1
+        && (settings.parallel_replicas_mode != ParallelReplicasMode::READ_TASKS
+            || !settings.allow_experimental_parallel_reading_from_replicas);
 
     size_t num_replicas = replica_states.size();
     if (num_replicas > 1)

From 9600d16e9f9983c8df4c79b985a97e389a832ace Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 20 Jan 2023 10:09:26 +0000
Subject: [PATCH 021/333] better tests for custom_key

---
 .../02404_memory_bound_merging.sql            |   2 +-
 ...max_parallel_replicas_custom_key.reference |  13 -
 ...02527_max_parallel_replicas_custom_key.sql |  36 -
 ...max_parallel_replicas_custom_key.reference | 659 ++++++++++++++++++
 .../02535_max_parallel_replicas_custom_key.sh |  47 ++
 5 files changed, 707 insertions(+), 50 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
 delete mode 100644 tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql
 create mode 100644 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
 create mode 100755 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh

diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql
index fb0c65e6a7c..f7db31dabb3 100644
--- a/tests/queries/0_stateless/02404_memory_bound_merging.sql
+++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel
+-- Tags: no-parallel, long
 
 create table t(a UInt64, b UInt64) engine=MergeTree order by a;
 system stop merges t;
diff --git a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
deleted file mode 100644
index c51a7e10fb2..00000000000
--- a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.reference
+++ /dev/null
@@ -1,13 +0,0 @@
-Hello
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
diff --git a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql b/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql
deleted file mode 100644
index b716600b9d5..00000000000
--- a/tests/queries/0_stateless/02527_max_parallel_replicas_custom_key.sql
+++ /dev/null
@@ -1,36 +0,0 @@
--- Tags: replica
-
-DROP TABLE IF EXISTS t;
-
-CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
-INSERT INTO t VALUES ('Hello');
-
-SET max_parallel_replicas = 3;
-SET parallel_replicas_mode = 'custom_key';
-
-SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'sipHash64(x)';
-
-DROP TABLE t;
-
-CREATE TABLE t (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x) SAMPLE BY cityHash64(x);
-INSERT INTO t SELECT toString(number), number FROM numbers(1000);
-
-SET max_parallel_replicas = 1;
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
-
-SET max_parallel_replicas = 2;
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
-
-SET max_parallel_replicas = 3;
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
-
-DROP TABLE t;
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
new file mode 100644
index 00000000000..01f48778b34
--- /dev/null
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
@@ -0,0 +1,659 @@
+query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)'
+filter_type='default' max_replicas=1 prefer_localhost_replica=0
+Hello
+(ReadFromRemote)
+filter_type='default' max_replicas=2 prefer_localhost_replica=0
+Hello
+(ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=0
+Hello
+(ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=0
+Hello
+(ReadFromRemote)
+filter_type='range' max_replicas=2 prefer_localhost_replica=0
+Hello
+(ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=0
+Hello
+(ReadFromRemote)
+filter_type='default' max_replicas=1 prefer_localhost_replica=1
+Hello
+(Expression)
+ExpressionTransform
+  (ReadFromMergeTree)
+  MergeTreeInOrder 0 → 1
+filter_type='default' max_replicas=2 prefer_localhost_replica=1
+Hello
+(Union)
+  (Expression)
+  ExpressionTransform
+    (Filter)
+    FilterTransform
+      (ReadFromMergeTree)
+      MergeTreeInOrder 0 → 1
+  (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=1
+Hello
+(Union)
+  (Expression)
+  ExpressionTransform
+    (Filter)
+    FilterTransform
+      (ReadFromMergeTree)
+      MergeTreeInOrder 0 → 1
+  (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=1
+Hello
+(Expression)
+ExpressionTransform
+  (ReadFromMergeTree)
+  MergeTreeInOrder 0 → 1
+filter_type='range' max_replicas=2 prefer_localhost_replica=1
+Hello
+(Union)
+  (Expression)
+  ExpressionTransform
+    (Filter)
+    FilterTransform
+      (ReadFromMergeTree)
+      MergeTreeInOrder 0 → 1
+  (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=1
+Hello
+(Union)
+  (Expression)
+  ExpressionTransform
+    (Filter)
+    FilterTransform
+      (ReadFromMergeTree)
+      MergeTreeInOrder 0 → 1
+  (ReadFromRemote)
+query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x)'
+filter_type='default' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='default' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='range' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='default' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='range' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='y'
+filter_type='default' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='default' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='range' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='default' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='range' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x) + y'
+filter_type='default' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='default' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='range' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='default' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='range' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x) + 1'
+filter_type='default' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='default' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=0
+1000
+(ReadFromRemote)
+filter_type='range' max_replicas=2 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=0
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (ReadFromRemote)
+filter_type='default' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='default' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='default' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=1 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform
+  (ReadFromStorage)
+  AggregatingTransform
+    ExpressionTransform
+      SourceFromSingleChunk 0 → 1
+filter_type='range' max_replicas=2 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 2 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
+filter_type='range' max_replicas=3 prefer_localhost_replica=1
+1000
+(Expression)
+ExpressionTransform × 4
+  (MergingAggregated)
+  Resize 1 → 4
+    SortingAggregatedTransform 4 → 1
+      MergingAggregatedBucketTransform × 4
+        Resize 1 → 4
+          GroupingAggregatedTransform 3 → 1
+            (Union)
+              (Aggregating)
+              AggregatingTransform
+                (Expression)
+                ExpressionTransform
+                  (Filter)
+                  FilterTransform
+                    (ReadFromMergeTree)
+                    MergeTreeInOrder 0 → 1
+              (ReadFromRemote)
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
new file mode 100755
index 00000000000..76138842e9a
--- /dev/null
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function run_with_custom_key {
+    echo "query='$1' with custom_key='$2'"
+    for prefer_localhost_replica in 0 1; do
+        for filter_type in 'default' 'range'; do
+            for max_replicas in {1..3}; do
+                echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica"
+                query="$1 SETTINGS max_parallel_replicas=$max_replicas\
+    , parallel_replicas_mode='custom_key'\
+    , parallel_replicas_custom_key='$2'\
+    , parallel_replicas_custom_key_filter_type='$filter_type'\
+    , max_threads=4\
+    , prefer_localhost_replica=$prefer_localhost_replica"
+                $CLICKHOUSE_CLIENT --query="$query"
+                $CLICKHOUSE_CLIENT --query="EXPLAIN PIPELINE $query"
+            done
+        done
+    done
+}
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key";
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x";
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')";
+
+run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x)"
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number FROM numbers(1000)"
+
+function run_count_with_custom_key {
+    run_with_custom_key "SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "$1"
+}
+
+run_count_with_custom_key "cityHash64(x)"
+run_count_with_custom_key "y"
+run_count_with_custom_key "cityHash64(x) + y"
+run_count_with_custom_key "cityHash64(x) + 1"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"

From 2cb106f1f9f5e893431e776c66c0b26c689a560a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 20 Jan 2023 12:55:26 +0000
Subject: [PATCH 022/333] Add integration test for custom_key

---
 .../__init__.py                               |  0
 .../configs/remote_servers.xml                | 50 ++++++++++
 .../test_parallel_replicas_custom_key/test.py | 94 +++++++++++++++++++
 3 files changed, 144 insertions(+)
 create mode 100644 tests/integration/test_parallel_replicas_custom_key/__init__.py
 create mode 100644 tests/integration/test_parallel_replicas_custom_key/configs/remote_servers.xml
 create mode 100644 tests/integration/test_parallel_replicas_custom_key/test.py

diff --git a/tests/integration/test_parallel_replicas_custom_key/__init__.py b/tests/integration/test_parallel_replicas_custom_key/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_parallel_replicas_custom_key/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_custom_key/configs/remote_servers.xml
new file mode 100644
index 00000000000..308db461498
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_custom_key/configs/remote_servers.xml
@@ -0,0 +1,50 @@
+<clickhouse>
+    <remote_servers>
+        <test_multiple_shards_multiple_replicas>
+            <shard>
+                <internal_replication>false</internal_replication>
+                <replica>
+                    <host>n1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>false</internal_replication>
+                <replica>
+                    <host>n3</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n4</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_multiple_shards_multiple_replicas>
+        <test_single_shard_multiple_replicas>
+            <shard>
+                <internal_replication>false</internal_replication>
+                <replica>
+                    <host>n1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n3</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n4</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_single_shard_multiple_replicas>
+    </remote_servers>
+</clickhouse>
+
diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py
new file mode 100644
index 00000000000..0787234a7b1
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@@ -0,0 +1,94 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+n1 = cluster.add_instance(
+    "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+)
+n2 = cluster.add_instance(
+    "n2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+)
+n3 = cluster.add_instance(
+    "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+)
+n4 = cluster.add_instance(
+    "n4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+)
+nodes = [n1, n2, n3, n4]
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def create_tables(cluster):
+    n1.query("DROP TABLE IF EXISTS dist_table")
+    n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}")
+
+    n1.query(
+        f"CREATE TABLE test_table ON CLUSTER {cluster} (key Int32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))"
+    )
+    n1.query(
+        f"""
+            CREATE TABLE dist_table AS test_table
+            Engine=Distributed(
+                {cluster},
+                currentDatabase(),
+                test_table,
+                rand()
+            )
+            """
+    )
+
+
+def insert_data(cluster, row_num):
+    create_tables(cluster)
+    n1.query(f"INSERT INTO dist_table SELECT number, number FROM numbers({row_num})")
+    n1.query("SYSTEM FLUSH DISTRIBUTED dist_table")
+
+
+@pytest.mark.parametrize("custom_key", ["sipHash64(value)", "key"])
+@pytest.mark.parametrize("filter_type", ["default", "range"])
+@pytest.mark.parametrize(
+    "cluster",
+    ["test_multiple_shards_multiple_replicas", "test_single_shard_multiple_replicas"],
+)
+def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter_type):
+    for node in nodes:
+        node.rotate_logs()
+
+    row_num = 1000
+    insert_data(cluster, row_num)
+    assert (
+        int(
+            n1.query(
+                "SELECT count() FROM dist_table",
+                settings={
+                    "prefer_localhost_replica": 0,
+                    "max_parallel_replicas": 3,
+                    "parallel_replicas_mode": "custom_key",
+                    "parallel_replicas_custom_key": custom_key,
+                    "parallel_replicas_custom_key_filter_type": filter_type,
+                },
+            )
+        )
+        == row_num
+    )
+
+    if cluster == "test_multiple_shards_multiple_replicas":
+        # we simply process query on all replicas for each shard by appending the filter on replica
+        assert all(
+            node.contains_in_log("Processing query on a replica using custom_key")
+            for node in nodes
+        )
+    else:
+        # we first transform all replicas into shards and then append for each shard filter
+        assert n1.contains_in_log(
+            "Single shard cluster used with custom_key, transforming replicas into virtual shards"
+        )

From ec149c4585c5a7655a275126be59b1694aeafd22 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 20 Jan 2023 12:56:20 +0000
Subject: [PATCH 023/333] Remove old test

---
 ...rallel_replicas_custom_key_range.reference | 13 -------
 ...max_parallel_replicas_custom_key_range.sql | 37 -------------------
 2 files changed, 50 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference
 delete mode 100644 tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql

diff --git a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference
deleted file mode 100644
index c51a7e10fb2..00000000000
--- a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.reference
+++ /dev/null
@@ -1,13 +0,0 @@
-Hello
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
-1000
diff --git a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql b/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
deleted file mode 100644
index a8ef9e58d40..00000000000
--- a/tests/queries/0_stateless/02528_max_parallel_replicas_custom_key_range.sql
+++ /dev/null
@@ -1,37 +0,0 @@
--- Tags: replica
-
-DROP TABLE IF EXISTS t;
-
-CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
-INSERT INTO t VALUES ('Hello');
-
-SET max_parallel_replicas = 3;
-SET parallel_replicas_mode = 'custom_key';
-SET parallel_replicas_custom_key_filter_type = 'range';
-
-SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'sipHash64(x)';
-
-DROP TABLE t;
-
-CREATE TABLE t (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x) SAMPLE BY cityHash64(x);
-INSERT INTO t SELECT toString(number), number FROM numbers(1000);
-
-SET max_parallel_replicas = 1;
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
-
-SET max_parallel_replicas = 2;
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
-
-SET max_parallel_replicas = 3;
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + y';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x)';
-SELECT count() FROM remote('127.0.0.{2|3|4}', currentDatabase(), t) SETTINGS parallel_replicas_custom_key = 'cityHash64(x) + 1';
-
-DROP TABLE t;

From 7bfaf88666a28d76d47a5ba4e3bd2b2925ce90fb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 20 Jan 2023 16:35:14 +0000
Subject: [PATCH 024/333] Fix stateless tests

---
 .../0_stateless/00124_shard_distributed_with_many_replicas.sql | 1 +
 .../01034_prewhere_max_parallel_replicas_distributed.sql       | 3 +++
 tests/queries/0_stateless/01034_sample_final_distributed.sql   | 3 +++
 .../0_stateless/01099_parallel_distributed_insert_select.sql   | 1 +
 tests/queries/0_stateless/01517_select_final_distributed.sql   | 2 ++
 .../0_stateless/01557_max_parallel_replicas_no_sample.sql      | 1 +
 .../0_stateless/02535_max_parallel_replicas_custom_key.sh      | 1 +
 7 files changed, 12 insertions(+)

diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
index ab4c433ba47..795551e5dfa 100644
--- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
+++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
@@ -1,6 +1,7 @@
 -- Tags: replica, distributed
 
 SET max_parallel_replicas = 2;
+SET parallel_replicas_mode = 'sample_key';
 DROP TABLE IF EXISTS report;
 
 set allow_deprecated_syntax_for_merge_tree=1;
diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
index b26c9af14a9..96d52d7e60e 100644
--- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
+++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
@@ -9,6 +9,9 @@ CREATE TABLE test_max_parallel_replicas_lr (timestamp UInt64) ENGINE = MergeTree
 INSERT INTO test_max_parallel_replicas_lr select number as timestamp from system.numbers limit 100;
 
 SET max_parallel_replicas = 2;
+SET parallel_replicas_mode = 'sample_key';
+SET allow_experimental_parallel_reading_from_replicas = 0;
+
 select count() FROM remote('127.0.0.{2|3}', currentDatabase(), test_max_parallel_replicas_lr) PREWHERE timestamp > 0;
 
 drop table test_max_parallel_replicas_lr;
diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql
index b784b35cbb3..1b1832f7a0d 100644
--- a/tests/queries/0_stateless/01034_sample_final_distributed.sql
+++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql
@@ -13,7 +13,10 @@ select count() from sample_final sample 1/2;
 select 'count sample final';
 select count() from sample_final final sample 1/2;
 select 'count final max_parallel_replicas';
+
+set allow_experimental_parallel_reading_from_replicas = 0;
 set max_parallel_replicas=2;
+set parallel_replicas_mode='sample_key';
 select count() from remote('127.0.0.{2|3}', currentDatabase(), sample_final) final;
 
 drop table if exists sample_final;
diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
index a75a5b2c33d..aa924218360 100644
--- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
+++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql
@@ -2,6 +2,7 @@
 
 -- set insert_distributed_sync = 1;  -- see https://github.com/ClickHouse/ClickHouse/issues/18971
 
+SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525
 SET prefer_localhost_replica = 1;
 
 DROP TABLE IF EXISTS local_01099_a;
diff --git a/tests/queries/0_stateless/01517_select_final_distributed.sql b/tests/queries/0_stateless/01517_select_final_distributed.sql
index a3d1fcfc185..701828b0b38 100644
--- a/tests/queries/0_stateless/01517_select_final_distributed.sql
+++ b/tests/queries/0_stateless/01517_select_final_distributed.sql
@@ -1,5 +1,7 @@
 -- Tags: distributed
 
+SET allow_experimental_parallel_reading_from_replicas = 0;
+
 DROP TABLE IF EXISTS test5346;
 
 CREATE TABLE test5346 (`Id` String, `Timestamp` DateTime, `updated` DateTime) 
diff --git a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
index 2b1a66147a4..c44c335700f 100644
--- a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
+++ b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
@@ -4,6 +4,7 @@ DROP TABLE IF EXISTS t;
 CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
 INSERT INTO t VALUES ('Hello');
 
+SET parallel_replicas_mode = 'sample_key';
 SET max_parallel_replicas = 3;
 SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t);
 
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
index 76138842e9a..e97643bd366 100755
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-parallel, long
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 40f42aa0789b6e501e7e13ce391896317778a316 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Sun, 22 Jan 2023 14:15:21 +0000
Subject: [PATCH 025/333] Remove pipeline from test

---
 ...max_parallel_replicas_custom_key.reference | 534 ------------------
 .../02535_max_parallel_replicas_custom_key.sh |   2 -
 2 files changed, 536 deletions(-)

diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
index 01f48778b34..21ed26b7579 100644
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
@@ -1,659 +1,125 @@
 query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
 Hello
-(ReadFromRemote)
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
 Hello
-(ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
 Hello
-(ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
 Hello
-(ReadFromRemote)
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
 Hello
-(ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
 Hello
-(ReadFromRemote)
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
 Hello
-(Expression)
-ExpressionTransform
-  (ReadFromMergeTree)
-  MergeTreeInOrder 0 → 1
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
 Hello
-(Union)
-  (Expression)
-  ExpressionTransform
-    (Filter)
-    FilterTransform
-      (ReadFromMergeTree)
-      MergeTreeInOrder 0 → 1
-  (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
 Hello
-(Union)
-  (Expression)
-  ExpressionTransform
-    (Filter)
-    FilterTransform
-      (ReadFromMergeTree)
-      MergeTreeInOrder 0 → 1
-  (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
 Hello
-(Expression)
-ExpressionTransform
-  (ReadFromMergeTree)
-  MergeTreeInOrder 0 → 1
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
 Hello
-(Union)
-  (Expression)
-  ExpressionTransform
-    (Filter)
-    FilterTransform
-      (ReadFromMergeTree)
-      MergeTreeInOrder 0 → 1
-  (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
 Hello
-(Union)
-  (Expression)
-  ExpressionTransform
-    (Filter)
-    FilterTransform
-      (ReadFromMergeTree)
-      MergeTreeInOrder 0 → 1
-  (ReadFromRemote)
 query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x)'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='y'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x) + y'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x) + 1'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
 1000
-(ReadFromRemote)
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (ReadFromRemote)
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform
-  (ReadFromStorage)
-  AggregatingTransform
-    ExpressionTransform
-      SourceFromSingleChunk 0 → 1
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 2 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
 1000
-(Expression)
-ExpressionTransform × 4
-  (MergingAggregated)
-  Resize 1 → 4
-    SortingAggregatedTransform 4 → 1
-      MergingAggregatedBucketTransform × 4
-        Resize 1 → 4
-          GroupingAggregatedTransform 3 → 1
-            (Union)
-              (Aggregating)
-              AggregatingTransform
-                (Expression)
-                ExpressionTransform
-                  (Filter)
-                  FilterTransform
-                    (ReadFromMergeTree)
-                    MergeTreeInOrder 0 → 1
-              (ReadFromRemote)
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
index e97643bd366..40f62d39769 100755
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@@ -15,10 +15,8 @@ function run_with_custom_key {
     , parallel_replicas_mode='custom_key'\
     , parallel_replicas_custom_key='$2'\
     , parallel_replicas_custom_key_filter_type='$filter_type'\
-    , max_threads=4\
     , prefer_localhost_replica=$prefer_localhost_replica"
                 $CLICKHOUSE_CLIENT --query="$query"
-                $CLICKHOUSE_CLIENT --query="EXPLAIN PIPELINE $query"
             done
         done
     done

From 3a0941573376d4e2001b9c5937715a2a021a483a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Sun, 22 Jan 2023 14:20:25 +0000
Subject: [PATCH 026/333] Refactor integration test

---
 .../test_parallel_replicas_custom_key/test.py | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py
index 0787234a7b1..0819aceb8c7 100644
--- a/tests/integration/test_parallel_replicas_custom_key/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@@ -3,19 +3,12 @@ from helpers.cluster import ClickHouseCluster
 
 cluster = ClickHouseCluster(__file__)
 
-n1 = cluster.add_instance(
-    "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-)
-n2 = cluster.add_instance(
-    "n2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-)
-n3 = cluster.add_instance(
-    "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-)
-n4 = cluster.add_instance(
-    "n4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-)
-nodes = [n1, n2, n3, n4]
+nodes = [
+    cluster.add_instance(
+        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+    )
+    for i in range(1, 5)
+]
 
 
 @pytest.fixture(scope="module", autouse=True)
@@ -28,6 +21,7 @@ def start_cluster():
 
 
 def create_tables(cluster):
+    n1 = nodes[0]
     n1.query("DROP TABLE IF EXISTS dist_table")
     n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}")
 
@@ -49,6 +43,7 @@ def create_tables(cluster):
 
 def insert_data(cluster, row_num):
     create_tables(cluster)
+    n1 = nodes[0]
     n1.query(f"INSERT INTO dist_table SELECT number, number FROM numbers({row_num})")
     n1.query("SYSTEM FLUSH DISTRIBUTED dist_table")
 
@@ -65,6 +60,8 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
 
     row_num = 1000
     insert_data(cluster, row_num)
+
+    n1 = nodes[0]
     assert (
         int(
             n1.query(

From c15128491bd3fc451abd07807e9ed95cee335710 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 23 Jan 2023 17:24:48 +0100
Subject: [PATCH 027/333] Update max_parallel_replicas docs

---
 docs/en/operations/settings/settings.md | 54 ++++++++++++++++++++++++-
 docs/en/sql-reference/operators/in.md   | 13 ++++--
 2 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e3f7bc11ddf..9fd4ccafb18 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1221,7 +1221,9 @@ Possible values:
 Default value: 1.
 
 :::warning
-Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas).
+Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) with [parallel_replicas_mode](#settings-parallel_replicas_mode) set to `sample_key` or `read_tasks`.
+If [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `custom_key`, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
+If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
 :::
 
 ## totals_mode {#totals-mode}
@@ -1246,17 +1248,65 @@ Default value: `1`.
 
 **Additional Info**
 
-This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
+This options will produce different results depending on the value of [parallel_replicas_mode](#settings-parallel_replicas_mode).
+
+### `sample_key`
+
+If [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `sample_key`, this setting is useful for replicated tables with a sampling key. 
+A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
 
 - The position of the sampling key in the partitioning key does not allow efficient range scans.
 - Adding a sampling key to the table makes filtering by other columns less efficient.
 - The sampling key is an expression that is expensive to calculate.
 - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
 
+### `custom_key`
+
+If [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `custom_key`, this setting is useful for any replicated table.
+A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
+and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
+
+Use `default` for [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type) unless the data is split across the entire integer space (e.g. column contains hash values),
+then `range` should be used.
+Simple expressions using primary keys are preferred.
+
+If the `custom_key` mode is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
+Otherwise, it will behave same as `sample_key` mode, it will use multiple replicas of each shard.
+
 :::warning
 This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
 :::
 
+## parallel_replicas_mode {#settings-parallel_replicas_mode}
+
+Mode of splitting work between replicas.
+
+Possible values:
+
+-   `sample_key` — Use `SAMPLE` key defined in the `SAMPLE BY` clause to split the work between replicas.
+-   `custom_key` — Define an arbitrary integer expression to use for splitting work between replicas.
+-   `read_tasks` — Split tasks for reading physical parts between replicas.
+
+Default value: `sample_key`.
+
+## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
+
+Arbitrary integer expression that will be used to split work between replicas.
+Used only if `parallel_replicas_mode` is set to `custom_key`.
+
+Default value: `''`.
+
+## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
+
+How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
+
+Possible values:
+
+-   `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
+-   `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
+
+Default value: `default`.
+
 ## compile_expressions {#compile-expressions}
 
 Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime.
diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md
index 58119cfc4f5..e1e4118524a 100644
--- a/docs/en/sql-reference/operators/in.md
+++ b/docs/en/sql-reference/operators/in.md
@@ -233,11 +233,12 @@ If `some_predicate` is not selective enough, it will return large amount of data
 
 ### Distributed Subqueries and max_parallel_replicas
 
-When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
+When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
 
+For example, if [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `sample_key`, the following:
 ```sql
 SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
-SETTINGS max_parallel_replicas=3
+SETTINGS max_parallel_replicas=3, parallel_replicas_mode='sample_key'
 ```
 
 is transformed on each server into
@@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM
 SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M
 ```
 
-where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
+where M is between 1 and 3 depending on which replica the local query is executing on.
 
-Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
+These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
+
+Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
 
 One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
+
+If a table doesn't have a sampling key, more flexible options for [parallel_replicas_mode](#settings-parallel_replicas_mode) can be used that can produce different and more optimal behaviour.

From 37b62b3a589d0dd57dcfc72a5043a2608b0c2f20 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 24 Jan 2023 10:46:47 +0000
Subject: [PATCH 028/333] Use Map for custom_key

---
 docs/en/operations/settings/settings.md       |  8 +-
 src/Core/Settings.h                           |  2 +-
 src/Interpreters/InterpreterSelectQuery.cpp   | 24 +++---
 .../getCustomKeyFilterForParallelReplicas.cpp | 44 +++++++++--
 .../getCustomKeyFilterForParallelReplicas.h   |  5 +-
 src/Storages/StorageDistributed.cpp           | 78 ++++++++++++++-----
 .../test_parallel_replicas_custom_key/test.py | 46 ++++++++++-
 .../02535_max_parallel_replicas_custom_key.sh |  2 +-
 8 files changed, 166 insertions(+), 43 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 9fd4ccafb18..68690e8a173 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1291,10 +1291,14 @@ Default value: `sample_key`.
 
 ## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
 
-Arbitrary integer expression that will be used to split work between replicas.
+Map of arbitrary integer expression that can be used to split work between replicas for a specific table.
+If it's used with `cluster` function, the key can be name of the local table defined inside the `cluster` function.
+If it's used with `Distributed` engine, the key can be name of the distributed table, alias or the local table for which the `Distributed` engine is created.
+The value can be any integer expression.
+
 Used only if `parallel_replicas_mode` is set to `custom_key`.
 
-Default value: `''`.
+Default value: `{}`.
 
 ## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 89686c92960..2c8b1c90faa 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -154,7 +154,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
     M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
     M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
-    M(String, parallel_replicas_custom_key, "", "Custom key for parallel replicas using modulo operation on the key for assigning work to replicas.", 0) \
+    M(Map, parallel_replicas_custom_key, "", "Custom key for parallel replicas using modulo operation on the key for assigning work to replicas.", 0) \
     M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
     M(ParallelReplicasMode, parallel_replicas_mode, ParallelReplicasMode::SAMPLE_KEY, "How to process query using multiple replicas.", 0) \
     \
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 817d40c086a..d174912f66d 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -513,18 +513,20 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     ASTPtr parallel_replicas_custom_filter_ast = nullptr;
     if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
     {
-        LOG_INFO(log, "Processing query on a replica using custom_key");
-        if (!storage)
-            throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
+        if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, joined_tables.tablesWithColumns().front().table, *context))
+        {
+            LOG_INFO(log, "Processing query on a replica using custom_key");
+            if (!storage)
+                throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
 
-        auto custom_key_ast = parseParallelReplicaCustomKey(settings.parallel_replicas_custom_key.value, *context);
-        parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
-            settings.parallel_replicas_count,
-            settings.parallel_replica_offset,
-            std::move(custom_key_ast),
-            settings.parallel_replicas_custom_key_filter_type,
-            *storage,
-            context);
+            parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
+                settings.parallel_replicas_count,
+                settings.parallel_replica_offset,
+                std::move(custom_key_ast),
+                settings.parallel_replicas_custom_key_filter_type,
+                *storage,
+                context);
+        }
     }
 
     auto analyze = [&](bool try_move_to_prewhere)
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
index fbc1d214164..cfc7cfd6194 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -116,15 +116,43 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
 }
 
-ASTPtr parseParallelReplicaCustomKey(std::string_view custom_key, const Context & context)
+ASTPtr parseCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context)
 {
-    if (custom_key.empty())
-        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Parallel replicas mode set to 'custom_key' but 'parallel_replicas_custom_key' has no value");
+    for (size_t i = 0; i < custom_keys.size(); ++i)
+    {
+        const auto & tuple = custom_keys[i].safeGet<const Tuple &>();
+        auto & table = tuple.at(0).safeGet<String>();
+        auto & filter = tuple.at(1).safeGet<String>();
 
-    ParserExpression parser;
-    const auto & settings = context.getSettingsRef();
-    return parseQuery(
-        parser, custom_key.data(), custom_key.data() + custom_key.size(),
-        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
+        if (table == target.alias ||
+            (table == target.table && context.getCurrentDatabase() == target.database) ||
+            (table == target.database + '.' + target.table))
+        {
+            /// Try to parse expression
+            ParserExpression parser;
+            const auto & settings = context.getSettingsRef();
+            return parseQuery(
+                parser, filter.data(), filter.data() + filter.size(),
+                "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
+        }
+    }
+
+    return nullptr;
 }
+
+bool containsCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context)
+{
+    for (size_t i = 0; i < custom_keys.size(); ++i)
+    {
+        const auto & tuple = custom_keys[i].safeGet<const Tuple &>();
+        auto & table = tuple.at(0).safeGet<String>();
+
+        if (table == target.alias ||
+            (table == target.table && context.getCurrentDatabase() == target.database) ||
+            (table == target.database + '.' + target.table))
+            return true;
+    }
+    return false;
+}
+
 }
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
index 251e72e0a97..fc515132487 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
@@ -4,6 +4,7 @@
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage.h>
 #include <Core/SettingsEnums.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
 
 namespace DB
 {
@@ -18,6 +19,8 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     const IStorage & storage,
     const ContextPtr & context);
 
-ASTPtr parseParallelReplicaCustomKey(std::string_view custom_key, const Context & context);
+ASTPtr parseCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context);
+
+bool containsCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context);
 
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index bf9ae2fdd20..7a3bd89e5ff 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -27,7 +27,6 @@
 #include <Common/quoteString.h>
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
-#include "Core/SettingsEnums.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -47,6 +46,7 @@
 #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Interpreters/Cluster.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterDescribeQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
@@ -80,6 +80,7 @@
 #include <Processors/Sinks/EmptySink.h>
 
 #include <Core/Settings.h>
+#include <Core/SettingsEnums.h>
 
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -91,8 +92,6 @@
 #include <optional>
 #include <cassert>
 
-#include <boost/rational.hpp>
-
 
 namespace fs = std::filesystem;
 
@@ -307,13 +306,13 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus
     return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas;
 }
 
-bool useVirtualShards(const Settings & settings, const Cluster & cluster)
+bool canUseCustomKey(const Settings & settings, const Cluster & cluster)
 {
     return settings.max_parallel_replicas > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
         && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
 }
-}
 
+}
 
 /// For destruction of std::unique_ptr of type that is incomplete in class definition.
 StorageDistributed::~StorageDistributed() = default;
@@ -453,7 +452,33 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
 
     size_t nodes = getClusterQueriedNodes(settings, cluster);
 
-    if (useVirtualShards(settings, *cluster))
+    const auto use_virtual_shards = [&]
+    {
+        if (!canUseCustomKey(settings, *cluster))
+            return false;
+
+        auto distributed_table = DatabaseAndTableWithAlias(
+            *getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0), local_context->getCurrentDatabase());
+
+        if (containsCustomKeyForTable(settings.parallel_replicas_custom_key, distributed_table, *local_context))
+        {
+            LOG_INFO(log, "Found custom_key for {}", distributed_table.getQualifiedNamePrefix(false));
+            return true;
+        }
+
+        DatabaseAndTableWithAlias remote_table_info;
+        remote_table_info.database = remote_database;
+        remote_table_info.table = remote_table;
+        if (containsCustomKeyForTable(settings.parallel_replicas_custom_key, remote_table_info, *local_context))
+        {
+            LOG_INFO(log, "Found custom_key for {}", remote_table_info.getQualifiedNamePrefix(false));
+            return true;
+        }
+
+        return false;
+    };
+
+    if (use_virtual_shards())
     {
         LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards");
 
@@ -767,23 +792,40 @@ void StorageDistributed::read(
     auto settings = local_context->getSettingsRef();
 
     ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
-    if (useVirtualShards(settings, *getCluster()))
+    if (canUseCustomKey(settings, *getCluster()))
     {
-        if (query_info.getCluster()->getShardCount() == 1)
+        const auto get_custom_key_ast = [&]() -> ASTPtr
         {
-            // we are reading from single shard with multiple replicas but didn't transform replicas
-            // into virtual shards with custom_key set
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards");
-        }
+            auto distributed_table = DatabaseAndTableWithAlias(
+                *getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0), local_context->getCurrentDatabase());
+            if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, distributed_table, *local_context))
+                return custom_key_ast;
 
-        auto custom_key_ast = parseParallelReplicaCustomKey(settings.parallel_replicas_custom_key.value, *local_context);
+            DatabaseAndTableWithAlias remote_table_info;
+            remote_table_info.database = remote_database;
+            remote_table_info.table = remote_table;
+            if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, remote_table_info, *local_context))
+                return custom_key_ast;
 
-        additional_shard_filter_generator =
-            [&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr
-        {
-            return getCustomKeyFilterForParallelReplica(
-                shard_count, shard_num - 1, custom_key_ast, settings.parallel_replicas_custom_key_filter_type, *this, local_context);
+            return nullptr;
         };
+
+        if (auto custom_key_ast = get_custom_key_ast())
+        {
+            if (query_info.getCluster()->getShardCount() == 1)
+            {
+                // we are reading from single shard with multiple replicas but didn't transform replicas
+                // into virtual shards with custom_key set
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards");
+            }
+
+            additional_shard_filter_generator =
+                [&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr
+            {
+                return getCustomKeyFilterForParallelReplica(
+                    shard_count, shard_num - 1, custom_key_ast, settings.parallel_replicas_custom_key_filter_type, *this, local_context);
+            };
+        }
     }
 
     bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py
index 0819aceb8c7..c11be610611 100644
--- a/tests/integration/test_parallel_replicas_custom_key/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@@ -70,7 +70,7 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
                     "prefer_localhost_replica": 0,
                     "max_parallel_replicas": 3,
                     "parallel_replicas_mode": "custom_key",
-                    "parallel_replicas_custom_key": custom_key,
+                    "parallel_replicas_custom_key": f"{{'test_table': '{custom_key}'}}",
                     "parallel_replicas_custom_key_filter_type": filter_type,
                 },
             )
@@ -89,3 +89,47 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
         assert n1.contains_in_log(
             "Single shard cluster used with custom_key, transforming replicas into virtual shards"
         )
+
+
+def test_custom_key_different_table_names(start_cluster):
+    def run(table_source, table_name_for_custom_key):
+        for node in nodes:
+            node.rotate_logs()
+
+        row_num = 1000
+        insert_data("test_single_shard_multiple_replicas", row_num)
+
+        n1 = nodes[0]
+        assert (
+            int(
+                n1.query(
+                    f"SELECT count() FROM {table_source}",
+                    settings={
+                        "prefer_localhost_replica": 0,
+                        "max_parallel_replicas": 3,
+                        "parallel_replicas_mode": "custom_key",
+                        "parallel_replicas_custom_key": f"{{'{table_name_for_custom_key}': 'sipHash64(value)'}}",
+                    },
+                )
+            )
+            == row_num
+        )
+
+        # we first transform all replicas into shards and then append for each shard filter
+        assert n1.contains_in_log(
+            "Single shard cluster used with custom_key, transforming replicas into virtual shards"
+        )
+
+    run("dist_table", "dist_table")
+    run("dist_table as d", "d")
+    run("dist_table as d", "dist_table")
+    run("dist_table as d", "test_table")
+    run(
+        "cluster('test_single_shard_multiple_replicas', default.test_table)",
+        "test_table",
+    )
+    run("cluster('test_single_shard_multiple_replicas', default.test_table) as d", "d")
+    run(
+        "cluster('test_single_shard_multiple_replicas', default.test_table) as d",
+        "test_table",
+    )
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
index 40f62d39769..b6e5c853dc2 100755
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@@ -13,7 +13,7 @@ function run_with_custom_key {
                 echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica"
                 query="$1 SETTINGS max_parallel_replicas=$max_replicas\
     , parallel_replicas_mode='custom_key'\
-    , parallel_replicas_custom_key='$2'\
+    , parallel_replicas_custom_key={'02535_custom_key': '$2'}\
     , parallel_replicas_custom_key_filter_type='$filter_type'\
     , prefer_localhost_replica=$prefer_localhost_replica"
                 $CLICKHOUSE_CLIENT --query="$query"

From 7df545fbe3af5c6ea688c8ee0a06d1004c6fa349 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 24 Jan 2023 11:21:09 +0000
Subject: [PATCH 029/333] Remove unused define

---
 src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
index cfc7cfd6194..229668ceff4 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -18,7 +18,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
-    extern const int BAD_ARGUMENTS;
 }
 
 ASTPtr getCustomKeyFilterForParallelReplica(

From ffa3d1d603d0e77b7cf0fa71a70ac3f53e7671ff Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 24 Jan 2023 13:58:42 +0000
Subject: [PATCH 030/333] Run query on 1 replica if no custom_key

---
 src/Interpreters/InterpreterSelectQuery.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 996efe2b4cf..9cecd53f6c8 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -511,7 +511,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
     ASTPtr parallel_replicas_custom_filter_ast = nullptr;
-    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY)
+    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty())
     {
         if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, joined_tables.tablesWithColumns().front().table, *context))
         {
@@ -527,6 +527,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 *storage,
                 context);
         }
+        else if (settings.parallel_replica_offset > 0)
+        {
+            LOG_DEBUG(
+                log,
+                "Will use no data on this replica because parallel replicas processing with custom_key has been requested"
+                " (setting 'max_parallel_replicas') but the table does not have custom_key defined for it (settings `parallel_replicas_custom_key`)");
+            parallel_replicas_custom_filter_ast = std::make_shared<ASTLiteral>(false);
+        }
     }
 
     auto analyze = [&](bool try_move_to_prewhere)

From 6da9dc2407cd11aa3347f64d3869a0ccfaac48d0 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 24 Jan 2023 14:16:26 +0000
Subject: [PATCH 031/333] Correctly handle case with missing custom_key

---
 .../test_parallel_replicas_custom_key/test.py            | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py
index c11be610611..9222f417a94 100644
--- a/tests/integration/test_parallel_replicas_custom_key/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@@ -68,7 +68,7 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
                 "SELECT count() FROM dist_table",
                 settings={
                     "prefer_localhost_replica": 0,
-                    "max_parallel_replicas": 3,
+                    "max_parallel_replicas": 4,
                     "parallel_replicas_mode": "custom_key",
                     "parallel_replicas_custom_key": f"{{'test_table': '{custom_key}'}}",
                     "parallel_replicas_custom_key_filter_type": filter_type,
@@ -92,7 +92,7 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
 
 
 def test_custom_key_different_table_names(start_cluster):
-    def run(table_source, table_name_for_custom_key):
+    def run(table_source, table_name_for_custom_key, should_use_virtual_shard=True):
         for node in nodes:
             node.rotate_logs()
 
@@ -106,7 +106,7 @@ def test_custom_key_different_table_names(start_cluster):
                     f"SELECT count() FROM {table_source}",
                     settings={
                         "prefer_localhost_replica": 0,
-                        "max_parallel_replicas": 3,
+                        "max_parallel_replicas": 4,
                         "parallel_replicas_mode": "custom_key",
                         "parallel_replicas_custom_key": f"{{'{table_name_for_custom_key}': 'sipHash64(value)'}}",
                     },
@@ -116,7 +116,7 @@ def test_custom_key_different_table_names(start_cluster):
         )
 
         # we first transform all replicas into shards and then append for each shard filter
-        assert n1.contains_in_log(
+        assert not should_use_virtual_shard or n1.contains_in_log(
             "Single shard cluster used with custom_key, transforming replicas into virtual shards"
         )
 
@@ -133,3 +133,4 @@ def test_custom_key_different_table_names(start_cluster):
         "cluster('test_single_shard_multiple_replicas', default.test_table) as d",
         "test_table",
     )
+    run("dist_table as d", "dist", should_use_virtual_shard=False)

From eee7df814926fc323be455c348d0fd8d37e76efc Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 1 Feb 2023 13:33:32 +0000
Subject: [PATCH 032/333] WIP on group_by_use_nulls

---
 src/Analyzer/ColumnNode.h                 |  6 ++
 src/Analyzer/FunctionNode.cpp             |  3 +-
 src/Analyzer/FunctionNode.h               | 13 +++-
 src/Analyzer/IQueryTreeNode.h             |  5 ++
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 81 +++++++++++++++++++----
 src/Interpreters/ActionsDAG.cpp           | 32 ++++++++-
 src/Interpreters/ActionsDAG.h             |  8 +++
 src/Planner/PlannerActionsVisitor.cpp     |  7 +-
 src/Planner/PlannerExpressionAnalysis.cpp |  5 +-
 9 files changed, 139 insertions(+), 21 deletions(-)

diff --git a/src/Analyzer/ColumnNode.h b/src/Analyzer/ColumnNode.h
index e378bc5f3d0..79c0e23c86f 100644
--- a/src/Analyzer/ColumnNode.h
+++ b/src/Analyzer/ColumnNode.h
@@ -3,6 +3,7 @@
 #include <Core/NamesAndTypes.h>
 
 #include <Analyzer/IQueryTreeNode.h>
+#include <DataTypes/DataTypeNullable.h>
 
 namespace DB
 {
@@ -117,6 +118,11 @@ public:
         return column.type;
     }
 
+    void convertToNullable() override
+    {
+        column.type = makeNullableSafe(column.type);
+    }
+
     void dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const override;
 
 protected:
diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp
index 7961bfbae31..718dcf4bb58 100644
--- a/src/Analyzer/FunctionNode.cpp
+++ b/src/Analyzer/FunctionNode.cpp
@@ -99,7 +99,7 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
     buffer << ", function_type: " << function_type;
 
     if (function)
-        buffer << ", result_type: " + function->getResultType()->getName();
+        buffer << ", result_type: " + getResultType()->getName();
 
     const auto & parameters = getParameters();
     if (!parameters.getNodes().empty())
@@ -177,6 +177,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
       */
     result_function->function = function;
     result_function->kind = kind;
+    result_function->wrap_with_nullable = wrap_with_nullable;
 
     return result_function;
 }
diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h
index 41751ec3f09..0311783b433 100644
--- a/src/Analyzer/FunctionNode.h
+++ b/src/Analyzer/FunctionNode.h
@@ -8,6 +8,7 @@
 #include <Common/typeid_cast.h>
 #include <Core/ColumnsWithTypeAndName.h>
 #include <Core/IResolvedFunction.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Functions/IFunction.h>
 
 namespace DB
@@ -170,7 +171,16 @@ public:
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
                 "Function node with name '{}' is not resolved",
                 function_name);
-        return function->getResultType();
+        auto type = function->getResultType();
+        if (wrap_with_nullable)
+          return makeNullableSafe(type);
+        return type;
+    }
+
+    void convertToNullable() override
+    {
+        chassert(kind == FunctionKind::ORDINARY);
+        wrap_with_nullable = true;
     }
 
     void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
@@ -188,6 +198,7 @@ private:
     String function_name;
     FunctionKind kind = FunctionKind::UNKNOWN;
     IResolvedFunctionPtr function;
+    bool wrap_with_nullable = false;
 
     static constexpr size_t parameters_child_index = 0;
     static constexpr size_t arguments_child_index = 1;
diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h
index 8aa834e60b7..3c8e73b9e31 100644
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@@ -90,6 +90,11 @@ public:
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for {} query node", getNodeTypeName());
     }
 
+    virtual void convertToNullable()
+    {
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method convertToNullable is not supported for {} query node", getNodeTypeName());
+    }
+
     /** Is tree equal to other tree with node root.
       *
       * Aliases of query tree nodes are compared during isEqual call.
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 7d1f636754c..4c1d6d309d6 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1,5 +1,6 @@
 #include <Analyzer/Passes/QueryAnalysisPass.h>
 
+#include "Common/logger_useful.h"
 #include <Common/NamePrompter.h>
 #include <Common/ProfileEvents.h>
 
@@ -446,6 +447,9 @@ public:
             alias_name_to_expressions[node_alias].push_back(node);
         }
 
+        if (const auto * function = node->as<FunctionNode>())
+            LOG_DEBUG(&Poco::Logger::get("ExpressionsStack"), "Pushed function {} on stack", function->getFunctionName());
+
         expressions.emplace_back(node);
     }
 
@@ -464,6 +468,9 @@ public:
                 alias_name_to_expressions.erase(it);
         }
 
+        if (const auto * function = top_expression->as<FunctionNode>())
+            LOG_DEBUG(&Poco::Logger::get("ExpressionsStack"), "Poped function {} on stack", function->getFunctionName());
+
         expressions.pop_back();
     }
 
@@ -482,6 +489,22 @@ public:
         return alias_name_to_expressions.contains(alias);
     }
 
+    bool hasAggregateFunction() const
+    {
+        const auto & factory = AggregateFunctionFactory::instance();
+        for (const auto & node : expressions)
+        {
+            const auto * function = node->as<FunctionNode>();
+            if (!function)
+                continue;
+
+            if (factory.isAggregateFunctionName(function->getFunctionName()))
+                return true;
+            LOG_DEBUG(&Poco::Logger::get("ExpressionsStack"), "Function {} is being resolved, but is not aggregate", function->getFunctionName());
+        }
+        return false;
+    }
+
     QueryTreeNodePtr getExpressionWithAlias(const std::string & alias) const
     {
         auto expression_it = alias_name_to_expressions.find(alias);
@@ -708,6 +731,8 @@ struct IdentifierResolveScope
     /// Table expression node to data
     std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;
 
+    QueryTreeNodePtrWithHashSet nullable_group_by_keys;
+
     /// Use identifier lookup to result cache
     bool use_identifier_lookup_to_result_cache = true;
 
@@ -1221,7 +1246,6 @@ private:
 
     /// Results of scalar sub queries
     std::unordered_map<QueryTreeNodeConstRawPtrWithHash, std::shared_ptr<ConstantValue>> scalars;
-
 };
 
 /// Utility functions implementation
@@ -3092,6 +3116,11 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
             resolve_result.resolve_place = IdentifierResolvePlace::DATABASE_CATALOG;
     }
 
+    if (resolve_result.resolved_identifier
+        && scope.nullable_group_by_keys.contains(resolve_result.resolved_identifier)
+        && !scope.expressions_in_resolve_process_stack.hasAggregateFunction())
+        resolve_result.resolved_identifier->convertToNullable();
+
     it->second = resolve_result;
 
     /** If identifier was not resolved, or during expression resolution identifier was explicitly added into non cached set,
@@ -3099,7 +3128,8 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
       */
     if (!resolve_result.resolved_identifier ||
         scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) ||
-        !scope.use_identifier_lookup_to_result_cache)
+        !scope.use_identifier_lookup_to_result_cache ||
+        scope.context->getSettingsRef().group_by_use_nulls)
         scope.identifier_lookup_to_result.erase(it);
 
     return resolve_result;
@@ -4627,6 +4657,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         }
 
         function_node.resolveAsFunction(std::move(function_base));
+        if (settings.group_by_use_nulls && scope.nullable_group_by_keys.contains(node))
+            function_node.convertToNullable();
     }
     catch (Exception & e)
     {
@@ -5873,9 +5905,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
     auto & query_node_typed = query_node->as<QueryNode &>();
     const auto & settings = scope.context->getSettingsRef();
 
-    if (settings.group_by_use_nulls)
-        throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "GROUP BY use nulls is not supported");
-
     bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
 
     if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.isGroupByWithTotals())
@@ -6011,16 +6040,11 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor);
     }
 
-    scope.use_identifier_lookup_to_result_cache = true;
+    if (!settings.group_by_use_nulls)
+        scope.use_identifier_lookup_to_result_cache = true;
 
     /// Resolve query node sections.
 
-    auto projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope);
-    if (query_node_typed.getProjection().getNodes().empty())
-        throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
-            "Empty list of columns in projection. In scope {}",
-            scope.scope_node->formatASTForErrorMessage());
-
     if (query_node_typed.hasWith())
         resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/);
 
@@ -6041,6 +6065,17 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
                 resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
             }
+
+            for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
+            {
+                for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
+                    scope.nullable_group_by_keys.insert(group_by_elem->clone());
+            }
+            // for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
+            // {
+            //     for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
+            //         group_by_elem->convertToNullable();
+            // }
         }
         else
         {
@@ -6048,6 +6083,14 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                 replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
 
             resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+
+            if (settings.group_by_use_nulls)
+            {
+                for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
+                    scope.nullable_group_by_keys.insert(group_by_elem->clone());
+                // for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
+                //     group_by_elem->convertToNullable();
+            }
         }
     }
 
@@ -6100,6 +6143,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         validateLimitOffsetExpression(query_node_typed.getOffset(), "OFFSET", scope);
     }
 
+    auto projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope);
+    if (query_node_typed.getProjection().getNodes().empty())
+        throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED,
+            "Empty list of columns in projection. In scope {}",
+            scope.scope_node->formatASTForErrorMessage());
+
     /** Resolve nodes with duplicate aliases.
       * Table expressions cannot have duplicate aliases.
       *
@@ -6234,7 +6283,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                 if (grouping_set_key->as<ConstantNode>())
                     continue;
 
-                group_by_keys_nodes.push_back(grouping_set_key);
+                group_by_keys_nodes.push_back(grouping_set_key->clone());
+                if (settings.group_by_use_nulls)
+                    group_by_keys_nodes.back()->convertToNullable();
             }
         }
         else
@@ -6242,7 +6293,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
             if (node->as<ConstantNode>())
                 continue;
 
-            group_by_keys_nodes.push_back(node);
+            group_by_keys_nodes.push_back(node->clone());
+            if (settings.group_by_use_nulls)
+                group_by_keys_nodes.back()->convertToNullable();
         }
     }
 
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 5f1398fed39..4e7dfe2bef5 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -1,5 +1,6 @@
 #include <Interpreters/ActionsDAG.h>
 
+#include <Analyzer/FunctionNode.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/IFunction.h>
@@ -199,6 +200,23 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
         std::move(children),
         std::move(arguments),
         std::move(result_name),
+        function_base->getResultType(),
+        all_const);
+}
+
+const ActionsDAG::Node & ActionsDAG::addFunction(
+    const FunctionNode & function,
+    NodeRawConstPtrs children,
+    std::string result_name)
+{
+    auto [arguments, all_const] = getFunctionArguments(children);
+
+    return addFunctionImpl(
+        function.getFunction(),
+        std::move(children),
+        std::move(arguments),
+        std::move(result_name),
+        function.getResultType(),
         all_const);
 }
 
@@ -214,6 +232,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
         std::move(children),
         std::move(arguments),
         std::move(result_name),
+        function_base->getResultType(),
         all_const);
 }
 
@@ -238,6 +257,7 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
     NodeRawConstPtrs children,
     ColumnsWithTypeAndName arguments,
     std::string result_name,
+    DataTypePtr result_type,
     bool all_const)
 {
     size_t num_arguments = children.size();
@@ -247,7 +267,7 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
     node.children = std::move(children);
 
     node.function_base = function_base;
-    node.result_type = node.function_base->getResultType();
+    node.result_type = result_type;
     node.function = node.function_base->prepare(arguments);
     node.is_deterministic = node.function_base->isDeterministic();
 
@@ -2236,7 +2256,15 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
                 for (const auto & child : node->children)
                     function_children.push_back(node_to_result_node.find(child)->second);
 
-                result_node = &result_dag->addFunction(node->function_base, std::move(function_children), {});
+                auto [arguments, all_const] = getFunctionArguments(function_children);
+
+                result_node = &result_dag->addFunctionImpl(
+                    node->function_base,
+                    std::move(function_children),
+                    std::move(arguments),
+                    {},
+                    node->result_type,
+                    all_const);
                 break;
             }
         }
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index 40bc76fe057..0b240feea0e 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -23,6 +23,8 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 class IFunctionOverloadResolver;
 using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
 
+class FunctionNode;
+
 class IDataType;
 using DataTypePtr = std::shared_ptr<const IDataType>;
 
@@ -139,10 +141,15 @@ public:
             const FunctionOverloadResolverPtr & function,
             NodeRawConstPtrs children,
             std::string result_name);
+    const Node & addFunction(
+        const FunctionNode & function,
+        NodeRawConstPtrs children,
+        std::string result_name);
     const Node & addFunction(
         const FunctionBasePtr & function_base,
         NodeRawConstPtrs children,
         std::string result_name);
+
     const Node & addCast(const Node & node_to_cast, const DataTypePtr & cast_type);
 
     /// Find first column by name in output nodes. This search is linear.
@@ -357,6 +364,7 @@ private:
         NodeRawConstPtrs children,
         ColumnsWithTypeAndName arguments,
         std::string result_name,
+        DataTypePtr result_type,
         bool all_const);
 
 #if USE_EMBEDDED_COMPILER
diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp
index 95edd93dd9f..13c88d47ca3 100644
--- a/src/Planner/PlannerActionsVisitor.cpp
+++ b/src/Planner/PlannerActionsVisitor.cpp
@@ -29,6 +29,7 @@
 #include <Planner/PlannerContext.h>
 #include <Planner/TableExpressionData.h>
 #include <Planner/Utils.h>
+#include <Poco/Logger.h>
 
 namespace DB
 {
@@ -82,6 +83,7 @@ public:
                 node_name,
                 actions_dag->dumpNames());
 
+        LOG_DEBUG(&Poco::Logger::get("ActionsScopeNode"), "Node: {} {}", it->second->result_name, it->second->result_type->getName());
         return it->second;
     }
 
@@ -122,7 +124,7 @@ public:
     }
 
     template <typename FunctionOrOverloadResolver>
-    const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOrOverloadResolver function)
+    const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, const FunctionOrOverloadResolver & function)
     {
         auto it = node_name_to_node.find(node_name);
         if (it != node_name_to_node.end())
@@ -225,6 +227,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
 PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
 {
     auto column_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
+    LOG_DEBUG(&Poco::Logger::get("PlannerActionsVisitorImpl"), "Processing column with name: {}", column_node_name);
     const auto & column_node = node->as<ColumnNode &>();
 
     Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
@@ -445,7 +448,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
     }
     else
     {
-        actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node.getFunction());
+        actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node);
     }
 
     size_t actions_stack_size = actions_stack.size();
diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index b1017c99c3e..3df6ef640c7 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -299,6 +299,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
 {
     const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
     const auto & projection_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    LOG_DEBUG(&Poco::Logger::get("PlannerExpressionAnalysis"), "Projection node: {}", query_node.getProjectionNode()->dumpTree());
     auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), projection_input, planner_context);
 
     auto projection_columns = query_node.getProjectionColumns();
@@ -320,6 +321,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
     for (size_t i = 0; i < projection_outputs_size; ++i)
     {
         auto & projection_column = projection_columns[i];
+        LOG_DEBUG(&Poco::Logger::get("PlannerExpressionAnalysis"), "Projection column {}: {} {}", i, projection_column.name, projection_column.type->getName());
         const auto * projection_node = projection_actions_outputs[i];
         const auto & projection_node_name = projection_node->result_name;
 
@@ -436,7 +438,6 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     }
 
     auto window_analysis_result_optional = analyzeWindow(query_tree, join_tree_input_columns, planner_context, actions_chain);
-    auto projection_analysis_result = analyzeProjection(query_node, join_tree_input_columns, planner_context, actions_chain);
 
     std::optional<SortAnalysisResult> sort_analysis_result_optional;
     if (query_node.hasOrderBy())
@@ -447,6 +448,8 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     if (query_node.hasLimitBy())
         limit_by_analysis_result_optional = analyzeLimitBy(query_node, join_tree_input_columns, planner_context, actions_chain);
 
+    auto projection_analysis_result = analyzeProjection(query_node, join_tree_input_columns, planner_context, actions_chain);
+
     const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
     const auto & project_names_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
     auto project_names_actions = std::make_shared<ActionsDAG>(project_names_input);

From f505b798da9282f7b1e2dfa8481bb299a5f365af Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 1 Feb 2023 17:18:15 +0000
Subject: [PATCH 033/333] Fix ActionDAG construction with group_by_use_nulls

---
 src/Planner/PlannerExpressionAnalysis.cpp | 80 +++++++++++++----------
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 3df6ef640c7..c7d38839c85 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -16,6 +16,8 @@
 #include <Planner/PlannerAggregation.h>
 #include <Planner/PlannerWindowFunctions.h>
 #include <Planner/Utils.h>
+#include "Common/tests/gtest_global_context.h"
+#include "DataTypes/DataTypeNullable.h"
 
 namespace DB
 {
@@ -33,12 +35,11 @@ namespace
   * It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized.
   */
 FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node,
-    const ColumnsWithTypeAndName & join_tree_input_columns,
+    const ColumnsWithTypeAndName & current_output_columns,
     const PlannerContextPtr & planner_context,
     ActionsChain & actions_chain)
 {
-    const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & filter_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & filter_input = current_output_columns;
 
     FilterAnalysisResult result;
 
@@ -52,8 +53,8 @@ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_no
 /** Construct aggregation analysis result if query tree has GROUP BY or aggregates.
   * Actions before aggregation are added into actions chain, if result is not null optional.
   */
-std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodePtr & query_tree,
-    const ColumnsWithTypeAndName & join_tree_input_columns,
+std::pair<std::optional<AggregationAnalysisResult>, std::optional<ColumnsWithTypeAndName>> analyzeAggregation(const QueryTreeNodePtr & query_tree,
+    const ColumnsWithTypeAndName & current_output_columns,
     const PlannerContextPtr & planner_context,
     ActionsChain & actions_chain)
 {
@@ -62,15 +63,12 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
     auto aggregate_function_nodes = collectAggregateFunctionNodes(query_tree);
     auto aggregates_descriptions = extractAggregateDescriptions(aggregate_function_nodes, *planner_context);
 
-    ColumnsWithTypeAndName aggregates_columns;
-    aggregates_columns.reserve(aggregates_descriptions.size());
-    for (auto & aggregate_description : aggregates_descriptions)
-        aggregates_columns.emplace_back(nullptr, aggregate_description.function->getResultType(), aggregate_description.column_name);
+    ColumnsWithTypeAndName aggregated_columns;
+    aggregated_columns.reserve(aggregates_descriptions.size());
 
     Names aggregation_keys;
 
-    const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & group_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & group_by_input = current_output_columns;
 
     ActionsDAGPtr before_aggregation_actions = std::make_shared<ActionsDAG>(group_by_input);
     before_aggregation_actions->getOutputs().clear();
@@ -83,6 +81,7 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
     PlannerActionsVisitor actions_visitor(planner_context);
 
     /// Add expressions from GROUP BY
+    bool group_by_use_nulls = planner_context->getQueryContext()->getSettingsRef().group_by_use_nulls;
 
     if (query_node.hasGroupBy())
     {
@@ -107,6 +106,8 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
                         if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name))
                             continue;
 
+                        auto expression_type_after_aggregation = group_by_use_nulls ? makeNullableSafe(expression_dag_node->result_type) : expression_dag_node->result_type;
+                        aggregated_columns.emplace_back(nullptr, expression_type_after_aggregation, expression_dag_node->result_name);
                         aggregation_keys.push_back(expression_dag_node->result_name);
                         before_aggregation_actions->getOutputs().push_back(expression_dag_node);
                         before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name);
@@ -150,6 +151,8 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
                 if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name))
                     continue;
 
+                auto expression_type_after_aggregation = group_by_use_nulls ? makeNullableSafe(expression_dag_node->result_type) : expression_dag_node->result_type;
+                aggregated_columns.emplace_back(nullptr, expression_type_after_aggregation, expression_dag_node->result_name);
                 aggregation_keys.push_back(expression_dag_node->result_name);
                 before_aggregation_actions->getOutputs().push_back(expression_dag_node);
                 before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name);
@@ -157,6 +160,9 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
         }
     }
 
+    for (auto & aggregate_description : aggregates_descriptions)
+        aggregated_columns.emplace_back(nullptr, aggregate_description.function->getResultType(), aggregate_description.column_name);
+
     /// Add expressions from aggregate functions arguments
 
     for (auto & aggregate_function_node : aggregate_function_nodes)
@@ -183,10 +189,10 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
       * With set number, which is used as an additional key at the stage of merging aggregating data.
       */
     if (query_node.isGroupByWithRollup() || query_node.isGroupByWithCube() || query_node.isGroupByWithGroupingSets())
-        aggregates_columns.emplace_back(nullptr, std::make_shared<DataTypeUInt64>(), "__grouping_set");
+        aggregated_columns.emplace_back(nullptr, std::make_shared<DataTypeUInt64>(), "__grouping_set");
 
     /// Only aggregation keys and aggregates are available for next steps after GROUP BY step
-    auto aggregate_step = std::make_unique<ActionsChainStep>(before_aggregation_actions, ActionsChainStep::AvailableOutputColumnsStrategy::OUTPUT_NODES, aggregates_columns);
+    auto aggregate_step = std::make_unique<ActionsChainStep>(before_aggregation_actions, ActionsChainStep::AvailableOutputColumnsStrategy::OUTPUT_NODES, aggregated_columns);
     actions_chain.addStep(std::move(aggregate_step));
 
     AggregationAnalysisResult aggregation_analysis_result;
@@ -196,14 +202,14 @@ std::optional<AggregationAnalysisResult> analyzeAggregation(const QueryTreeNodeP
     aggregation_analysis_result.grouping_sets_parameters_list = std::move(grouping_sets_parameters_list);
     aggregation_analysis_result.group_by_with_constant_keys = group_by_with_constant_keys;
 
-    return aggregation_analysis_result;
+    return { aggregation_analysis_result, aggregated_columns };
 }
 
 /** Construct window analysis result if query tree has window functions.
   * Actions before window functions are added into actions chain, if result is not null optional.
   */
 std::optional<WindowAnalysisResult> analyzeWindow(const QueryTreeNodePtr & query_tree,
-    const ColumnsWithTypeAndName & join_tree_input_columns,
+    const ColumnsWithTypeAndName & current_output_columns,
     const PlannerContextPtr & planner_context,
     ActionsChain & actions_chain)
 {
@@ -213,8 +219,7 @@ std::optional<WindowAnalysisResult> analyzeWindow(const QueryTreeNodePtr & query
 
     auto window_descriptions = extractWindowDescriptions(window_function_nodes, *planner_context);
 
-    const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & window_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & window_input = current_output_columns;
 
     PlannerActionsVisitor actions_visitor(planner_context);
 
@@ -293,12 +298,11 @@ std::optional<WindowAnalysisResult> analyzeWindow(const QueryTreeNodePtr & query
   * It is client responsibility to update projection analysis result with project names actions after chain is finalized.
   */
 ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
-    const ColumnsWithTypeAndName & join_tree_input_columns,
+    const ColumnsWithTypeAndName & current_output_columns,
     const PlannerContextPtr & planner_context,
     ActionsChain & actions_chain)
 {
-    const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & projection_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & projection_input = current_output_columns;
     LOG_DEBUG(&Poco::Logger::get("PlannerExpressionAnalysis"), "Projection node: {}", query_node.getProjectionNode()->dumpTree());
     auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), projection_input, planner_context);
 
@@ -344,12 +348,11 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
   * Actions before sort are added into actions chain.
   */
 SortAnalysisResult analyzeSort(const QueryNode & query_node,
-    const ColumnsWithTypeAndName & join_tree_input_columns,
+    const ColumnsWithTypeAndName & current_output_columns,
     const PlannerContextPtr & planner_context,
     ActionsChain & actions_chain)
 {
-    const auto *chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & order_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & order_by_input = current_output_columns;
 
     ActionsDAGPtr before_sort_actions = std::make_shared<ActionsDAG>(order_by_input);
     auto & before_sort_actions_outputs = before_sort_actions->getOutputs();
@@ -388,12 +391,11 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
   * Actions before limit by are added into actions chain.
   */
 LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node,
-    const ColumnsWithTypeAndName & join_tree_input_columns,
+    const ColumnsWithTypeAndName & current_output_columns,
     const PlannerContextPtr & planner_context,
     ActionsChain & actions_chain)
 {
-    const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & limit_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & limit_by_input = current_output_columns;
     auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), limit_by_input, planner_context);
 
     Names limit_by_column_names;
@@ -420,35 +422,47 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     std::optional<FilterAnalysisResult> where_analysis_result_optional;
     std::optional<size_t> where_action_step_index_optional;
 
+    ColumnsWithTypeAndName current_output_columns = join_tree_input_columns;
+
     if (query_node.hasWhere())
     {
-        where_analysis_result_optional = analyzeFilter(query_node.getWhere(), join_tree_input_columns, planner_context, actions_chain);
+        where_analysis_result_optional = analyzeFilter(query_node.getWhere(), current_output_columns, planner_context, actions_chain);
         where_action_step_index_optional = actions_chain.getLastStepIndex();
+        current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
     }
 
-    auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, join_tree_input_columns, planner_context, actions_chain);
+    auto [aggregation_analysis_result_optional, aggregated_columns_optional] = analyzeAggregation(query_tree, current_output_columns, planner_context, actions_chain);
+    if (aggregated_columns_optional)
+        current_output_columns = std::move(*aggregated_columns_optional);
 
     std::optional<FilterAnalysisResult> having_analysis_result_optional;
     std::optional<size_t> having_action_step_index_optional;
 
     if (query_node.hasHaving())
     {
-        having_analysis_result_optional = analyzeFilter(query_node.getHaving(), join_tree_input_columns, planner_context, actions_chain);
+        having_analysis_result_optional = analyzeFilter(query_node.getHaving(), current_output_columns, planner_context, actions_chain);
         having_action_step_index_optional = actions_chain.getLastStepIndex();
+        current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
     }
 
-    auto window_analysis_result_optional = analyzeWindow(query_tree, join_tree_input_columns, planner_context, actions_chain);
+    auto window_analysis_result_optional = analyzeWindow(query_tree, current_output_columns, planner_context, actions_chain);
 
     std::optional<SortAnalysisResult> sort_analysis_result_optional;
     if (query_node.hasOrderBy())
-        sort_analysis_result_optional = analyzeSort(query_node, join_tree_input_columns, planner_context, actions_chain);
+    {
+        sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain);
+        current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
+    }
 
     std::optional<LimitByAnalysisResult> limit_by_analysis_result_optional;
 
     if (query_node.hasLimitBy())
-        limit_by_analysis_result_optional = analyzeLimitBy(query_node, join_tree_input_columns, planner_context, actions_chain);
+    {
+        limit_by_analysis_result_optional = analyzeLimitBy(query_node, current_output_columns, planner_context, actions_chain);
+        current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
+    }
 
-    auto projection_analysis_result = analyzeProjection(query_node, join_tree_input_columns, planner_context, actions_chain);
+    auto projection_analysis_result = analyzeProjection(query_node, current_output_columns, planner_context, actions_chain);
 
     const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
     const auto & project_names_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;

From 7e40ab7658e1ad02acbd8ddf7067f4091a07338e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 1 Feb 2023 18:04:19 +0000
Subject: [PATCH 034/333] Update output columns after WindowStep

---
 src/Planner/PlannerExpressionAnalysis.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index c7d38839c85..79feb501f37 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -446,6 +446,8 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     }
 
     auto window_analysis_result_optional = analyzeWindow(query_tree, current_output_columns, planner_context, actions_chain);
+    if (window_analysis_result_optional)
+        current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
 
     std::optional<SortAnalysisResult> sort_analysis_result_optional;
     if (query_node.hasOrderBy())

From d589864828ed31dcba034947375acf98a6897948 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 1 Feb 2023 18:12:13 +0000
Subject: [PATCH 035/333] Cleanup code

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 14 --------------
 src/Planner/PlannerActionsVisitor.cpp     |  3 ---
 src/Planner/PlannerExpressionAnalysis.cpp |  5 +----
 3 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 4c1d6d309d6..003944fa8b0 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -447,9 +447,6 @@ public:
             alias_name_to_expressions[node_alias].push_back(node);
         }
 
-        if (const auto * function = node->as<FunctionNode>())
-            LOG_DEBUG(&Poco::Logger::get("ExpressionsStack"), "Pushed function {} on stack", function->getFunctionName());
-
         expressions.emplace_back(node);
     }
 
@@ -468,9 +465,6 @@ public:
                 alias_name_to_expressions.erase(it);
         }
 
-        if (const auto * function = top_expression->as<FunctionNode>())
-            LOG_DEBUG(&Poco::Logger::get("ExpressionsStack"), "Poped function {} on stack", function->getFunctionName());
-
         expressions.pop_back();
     }
 
@@ -500,7 +494,6 @@ public:
 
             if (factory.isAggregateFunctionName(function->getFunctionName()))
                 return true;
-            LOG_DEBUG(&Poco::Logger::get("ExpressionsStack"), "Function {} is being resolved, but is not aggregate", function->getFunctionName());
         }
         return false;
     }
@@ -6071,11 +6064,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                 for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
                     scope.nullable_group_by_keys.insert(group_by_elem->clone());
             }
-            // for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
-            // {
-            //     for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
-            //         group_by_elem->convertToNullable();
-            // }
         }
         else
         {
@@ -6088,8 +6076,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
             {
                 for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
                     scope.nullable_group_by_keys.insert(group_by_elem->clone());
-                // for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
-                //     group_by_elem->convertToNullable();
             }
         }
     }
diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp
index 13c88d47ca3..6bd245858fe 100644
--- a/src/Planner/PlannerActionsVisitor.cpp
+++ b/src/Planner/PlannerActionsVisitor.cpp
@@ -29,7 +29,6 @@
 #include <Planner/PlannerContext.h>
 #include <Planner/TableExpressionData.h>
 #include <Planner/Utils.h>
-#include <Poco/Logger.h>
 
 namespace DB
 {
@@ -83,7 +82,6 @@ public:
                 node_name,
                 actions_dag->dumpNames());
 
-        LOG_DEBUG(&Poco::Logger::get("ActionsScopeNode"), "Node: {} {}", it->second->result_name, it->second->result_type->getName());
         return it->second;
     }
 
@@ -227,7 +225,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
 PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
 {
     auto column_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
-    LOG_DEBUG(&Poco::Logger::get("PlannerActionsVisitorImpl"), "Processing column with name: {}", column_node_name);
     const auto & column_node = node->as<ColumnNode &>();
 
     Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 79feb501f37..99815d89168 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -1,6 +1,7 @@
 #include <Planner/PlannerExpressionAnalysis.h>
 
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
 
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/ConstantNode.h>
@@ -16,8 +17,6 @@
 #include <Planner/PlannerAggregation.h>
 #include <Planner/PlannerWindowFunctions.h>
 #include <Planner/Utils.h>
-#include "Common/tests/gtest_global_context.h"
-#include "DataTypes/DataTypeNullable.h"
 
 namespace DB
 {
@@ -303,7 +302,6 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
     ActionsChain & actions_chain)
 {
     const auto & projection_input = current_output_columns;
-    LOG_DEBUG(&Poco::Logger::get("PlannerExpressionAnalysis"), "Projection node: {}", query_node.getProjectionNode()->dumpTree());
     auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), projection_input, planner_context);
 
     auto projection_columns = query_node.getProjectionColumns();
@@ -325,7 +323,6 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
     for (size_t i = 0; i < projection_outputs_size; ++i)
     {
         auto & projection_column = projection_columns[i];
-        LOG_DEBUG(&Poco::Logger::get("PlannerExpressionAnalysis"), "Projection column {}: {} {}", i, projection_column.name, projection_column.type->getName());
         const auto * projection_node = projection_actions_outputs[i];
         const auto & projection_node_name = projection_node->result_name;
 

From 139a684952d42fd3be4289ca2ada8cfedb7a6591 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 3 Feb 2023 16:00:25 +0000
Subject: [PATCH 036/333] Move analyzeProjection call

---
 src/Planner/PlannerExpressionAnalysis.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 99815d89168..d0104b0bf2a 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -419,7 +419,8 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     std::optional<FilterAnalysisResult> where_analysis_result_optional;
     std::optional<size_t> where_action_step_index_optional;
 
-    ColumnsWithTypeAndName current_output_columns = join_tree_input_columns;
+    const auto * input_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
+    ColumnsWithTypeAndName current_output_columns = input_columns ? *input_columns : join_tree_input_columns;
 
     if (query_node.hasWhere())
     {
@@ -446,6 +447,9 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     if (window_analysis_result_optional)
         current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
 
+    auto projection_analysis_result = analyzeProjection(query_node, current_output_columns, planner_context, actions_chain);
+    current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
+
     std::optional<SortAnalysisResult> sort_analysis_result_optional;
     if (query_node.hasOrderBy())
     {
@@ -461,10 +465,8 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
         current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
     }
 
-    auto projection_analysis_result = analyzeProjection(query_node, current_output_columns, planner_context, actions_chain);
-
     const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull();
-    const auto & project_names_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns;
+    const auto & project_names_input = chain_available_output_columns ? *chain_available_output_columns : current_output_columns;
     auto project_names_actions = std::make_shared<ActionsDAG>(project_names_input);
     project_names_actions->project(projection_analysis_result.projection_column_names_with_display_aliases);
     actions_chain.addStep(std::make_unique<ActionsChainStep>(project_names_actions));

From a1362b081f5fc4dd8e4fd8762c91800f5e1f0aff Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 3 Feb 2023 17:14:50 +0000
Subject: [PATCH 037/333] Add test

---
 ...2535_analyzer_group_by_use_nulls.reference | 256 ++++++++++++++++++
 .../02535_analyzer_group_by_use_nulls.sql     |  85 ++++++
 2 files changed, 341 insertions(+)
 create mode 100644 tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference
 create mode 100644 tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql

diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference
new file mode 100644
index 00000000000..50755627996
--- /dev/null
+++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference
@@ -0,0 +1,256 @@
+-- { echoOn }
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+0	0	0
+1	1	1
+2	0	2
+3	1	3
+4	0	4
+5	1	5
+6	0	6
+7	1	7
+8	0	8
+9	1	9
+\N	\N	45
+set optimize_group_by_function_keys = 0;
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+0	0	0
+0	\N	0
+1	1	1
+1	\N	1
+2	0	2
+2	\N	2
+3	1	3
+3	\N	3
+4	0	4
+4	\N	4
+5	1	5
+5	\N	5
+6	0	6
+6	\N	6
+7	1	7
+7	\N	7
+8	0	8
+8	\N	8
+9	1	9
+9	\N	9
+\N	\N	45
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=0;
+0	0	0
+0	0	0
+0	0	45
+1	0	1
+1	1	1
+2	0	2
+2	0	2
+3	0	3
+3	1	3
+4	0	4
+4	0	4
+5	0	5
+5	1	5
+6	0	6
+6	0	6
+7	0	7
+7	1	7
+8	0	8
+8	0	8
+9	0	9
+9	1	9
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY CUBE(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+0	0	0
+0	\N	0
+1	1	1
+1	\N	1
+2	0	2
+2	\N	2
+3	1	3
+3	\N	3
+4	0	4
+4	\N	4
+5	1	5
+5	\N	5
+6	0	6
+6	\N	6
+7	1	7
+7	\N	7
+8	0	8
+8	\N	8
+9	1	9
+9	\N	9
+\N	0	20
+\N	1	25
+\N	\N	45
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY CUBE(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=0;
+0	0	0
+0	0	0
+0	0	20
+0	0	45
+0	1	25
+1	0	1
+1	1	1
+2	0	2
+2	0	2
+3	0	3
+3	1	3
+4	0	4
+4	0	4
+5	0	5
+5	1	5
+6	0	6
+6	0	6
+7	0	7
+7	1	7
+8	0	8
+8	0	8
+9	0	9
+9	1	9
+SELECT
+    number,
+    number % 2,
+    sum(number) AS val
+FROM numbers(10)
+GROUP BY
+    GROUPING SETS (
+        (number),
+        (number % 2)
+    )
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls = 1;
+0	\N	0
+1	\N	1
+2	\N	2
+3	\N	3
+4	\N	4
+5	\N	5
+6	\N	6
+7	\N	7
+8	\N	8
+9	\N	9
+\N	0	20
+\N	1	25
+SELECT
+    number,
+    number % 2,
+    sum(number) AS val
+FROM numbers(10)
+GROUP BY
+    GROUPING SETS (
+        (number),
+        (number % 2)
+    )
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls = 0;
+0	0	0
+0	0	20
+0	1	25
+1	0	1
+2	0	2
+3	0	3
+4	0	4
+5	0	5
+6	0	6
+7	0	7
+8	0	8
+9	0	9
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2) WITH TOTALS
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+0	0	0
+0	\N	0
+1	1	1
+1	\N	1
+2	0	2
+2	\N	2
+3	1	3
+3	\N	3
+4	0	4
+4	\N	4
+5	1	5
+5	\N	5
+6	0	6
+6	\N	6
+7	1	7
+7	\N	7
+8	0	8
+8	\N	8
+9	1	9
+9	\N	9
+\N	\N	45
+
+0	0	45
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY CUBE(number, number % 2) WITH TOTALS
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+0	0	0
+0	\N	0
+1	1	1
+1	\N	1
+2	0	2
+2	\N	2
+3	1	3
+3	\N	3
+4	0	4
+4	\N	4
+5	1	5
+5	\N	5
+6	0	6
+6	\N	6
+7	1	7
+7	\N	7
+8	0	8
+8	\N	8
+9	1	9
+9	\N	9
+\N	0	20
+\N	1	25
+\N	\N	45
+
+0	0	45
+SELECT
+    number,
+    number % 2,
+    sum(number) AS val
+FROM numbers(10)
+GROUP BY
+    GROUPING SETS (
+        (number),
+        (number % 2)
+    )
+ORDER BY 1, tuple(val)
+SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10;
+0	\N	0
+1	\N	1
+2	\N	2
+3	\N	3
+4	\N	4
+5	\N	5
+6	\N	6
+7	\N	7
+8	\N	8
+9	\N	9
+\N	0	20
+\N	1	25
diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql
new file mode 100644
index 00000000000..a4d4f2f8bc9
--- /dev/null
+++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql
@@ -0,0 +1,85 @@
+SET allow_experimental_analyzer=1;
+
+-- { echoOn }
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+
+set optimize_group_by_function_keys = 0;
+
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=0;
+
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY CUBE(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY CUBE(number, number % 2)
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=0;
+
+SELECT
+    number,
+    number % 2,
+    sum(number) AS val
+FROM numbers(10)
+GROUP BY
+    GROUPING SETS (
+        (number),
+        (number % 2)
+    )
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls = 1;
+
+SELECT
+    number,
+    number % 2,
+    sum(number) AS val
+FROM numbers(10)
+GROUP BY
+    GROUPING SETS (
+        (number),
+        (number % 2)
+    )
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls = 0;
+
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY ROLLUP(number, number % 2) WITH TOTALS
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+
+SELECT number, number % 2, sum(number) AS val
+FROM numbers(10)
+GROUP BY CUBE(number, number % 2) WITH TOTALS
+ORDER BY (number, number % 2, val)
+SETTINGS group_by_use_nulls=1;
+
+SELECT
+    number,
+    number % 2,
+    sum(number) AS val
+FROM numbers(10)
+GROUP BY
+    GROUPING SETS (
+        (number),
+        (number % 2)
+    )
+ORDER BY 1, tuple(val)
+SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10;

From 3ea3b3545d8c37e20ce5d5674038807030656590 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 3 Feb 2023 18:02:50 +0000
Subject: [PATCH 038/333] Add statefull test

---
 .../1_stateful/00173_group_by_use_nulls.sql        | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.sql b/tests/queries/1_stateful/00173_group_by_use_nulls.sql
index 7acacc4e579..f110ebb0db4 100644
--- a/tests/queries/1_stateful/00173_group_by_use_nulls.sql
+++ b/tests/queries/1_stateful/00173_group_by_use_nulls.sql
@@ -8,3 +8,17 @@ ORDER BY
     CounterID ASC
 LIMIT 10
 SETTINGS group_by_use_nulls = 1;
+
+-- { echoOn }
+set allow_experimental_analyzer = 1;
+
+SELECT
+    CounterID AS k,
+    quantileBFloat16(0.5)(ResolutionWidth)
+FROM remote('127.0.0.{1,2}', test, hits)
+GROUP BY k
+ORDER BY
+    count() DESC,
+    CounterID ASC
+LIMIT 10
+SETTINGS group_by_use_nulls = 1;

From aca4f08bf5b2aac5fae6cc897cd0be4b9498b65a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 6 Feb 2023 14:33:45 +0000
Subject: [PATCH 039/333] Correctly calculate size after snapshot load

---
 src/Coordination/CoordinationSettings.cpp  |  2 +-
 src/Coordination/FourLetterCommand.cpp     |  9 +++++++
 src/Coordination/FourLetterCommand.h       | 13 ++++++++++
 src/Coordination/KeeperDispatcher.h        |  5 ++++
 src/Coordination/KeeperServer.cpp          |  5 ++++
 src/Coordination/KeeperServer.h            |  2 ++
 src/Coordination/KeeperSnapshotManager.cpp | 28 +++++++++++++---------
 src/Coordination/KeeperStateMachine.cpp    |  8 +++++++
 src/Coordination/KeeperStateMachine.h      |  1 +
 src/Coordination/KeeperStorage.cpp         | 20 ++++++++++++++--
 src/Coordination/KeeperStorage.h           |  5 +++-
 src/Coordination/SnapshotableHashTable.h   | 13 ++++++++++
 12 files changed, 96 insertions(+), 15 deletions(-)

diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index e665ccb89c7..f5c79d3be7a 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
 }
 
 
-const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld";
+const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 3e0e5acee0c..6157daad1cd 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -145,6 +145,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr request_leader_command = std::make_shared<RequestLeaderCommand>(keeper_dispatcher);
         factory.registerCommand(request_leader_command);
 
+        FourLetterCommandPtr recalculate_command = std::make_shared<RecalculateCommand>(keeper_dispatcher);
+        factory.registerCommand(recalculate_command);
+
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
     }
@@ -515,4 +518,10 @@ String RequestLeaderCommand::run()
     return keeper_dispatcher.requestLeader() ? "Sent leadership request to leader." : "Failed to send leadership request to leader.";
 }
 
+String RecalculateCommand::run()
+{
+    keeper_dispatcher.recalculateStorageStats();
+    return "ok";
+}
+
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index 8a8aacf7a3a..e1fe0333081 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -377,4 +377,17 @@ struct RequestLeaderCommand : public IFourLetterCommand
     ~RequestLeaderCommand() override = default;
 };
 
+/// Request to be leader.
+struct RecalculateCommand : public IFourLetterCommand
+{
+    explicit RecalculateCommand(KeeperDispatcher & keeper_dispatcher_)
+        : IFourLetterCommand(keeper_dispatcher_)
+    {
+    }
+
+    String name() override { return "rclc"; }
+    String run() override;
+    ~RecalculateCommand() override = default;
+};
+
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index ff902d8e036..90965d0934e 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -225,6 +225,11 @@ public:
     {
         return server->requestLeader();
     }
+
+    void recalculateStorageStats()
+    {
+        return server->recalculateStorageStats();
+    }
 };
 
 }
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index b823fbc697c..05f97313edd 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -947,4 +947,9 @@ bool KeeperServer::requestLeader()
     return isLeader() || raft_instance->request_leadership();
 }
 
+void KeeperServer::recalculateStorageStats()
+{
+    state_machine->recalculateStorageStats();
+}
+
 }
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index feadf3bb7ce..bcff81f66f2 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -137,6 +137,8 @@ public:
     KeeperLogInfo getKeeperLogInfo();
 
     bool requestLeader();
+
+    void recalculateStorageStats();
 };
 
 }
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index a3f7dbc2c6a..8b80db3e520 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -361,19 +361,25 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
                     "If you still want to ignore it, you can set 'keeper_server.ignore_system_path_on_startup' to true",
                     error_msg);
         }
-        else if (match_result == EXACT && !is_node_empty(node))
+        else if (match_result == EXACT)
         {
-            if (keeper_context->ignore_system_path_on_startup || keeper_context->server_state != KeeperContext::Phase::INIT)
+            if (!is_node_empty(node))
             {
-                LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "{}. Ignoring it", error_msg);
-                node = KeeperStorage::Node{};
+                if (keeper_context->ignore_system_path_on_startup || keeper_context->server_state != KeeperContext::Phase::INIT)
+                {
+                    LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "{}. Ignoring it", error_msg);
+                    node = KeeperStorage::Node{};
+                }
+                else
+                    throw Exception(
+                        ErrorCodes::LOGICAL_ERROR,
+                        "{}. Ignoring it can lead to data loss. "
+                        "If you still want to ignore it, you can set 'keeper_server.ignore_system_path_on_startup' to true",
+                        error_msg);
             }
-            else
-                throw Exception(
-                    ErrorCodes::LOGICAL_ERROR,
-                    "{}. Ignoring it can lead to data loss. "
-                    "If you still want to ignore it, you can set 'keeper_server.ignore_system_path_on_startup' to true",
-                    error_msg);
+
+            // we always ignore the written size for this node
+            node.recalculateSize();
         }
 
         storage.container.insertOrReplace(path, node);
@@ -390,7 +396,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
         {
             auto parent_path = parentPath(itr.key);
             storage.container.updateValue(
-                parent_path, [path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseName(path)); });
+                parent_path, [version, path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseName(path), /*update_size*/ version < SnapshotVersion::V4); });
         }
     }
 
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index e591f87c6f1..42ff59f45d9 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -640,4 +640,12 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
     return nullptr;
 }
 
+void KeeperStateMachine::recalculateStorageStats()
+{
+    std::lock_guard lock(storage_and_responses_lock);
+    LOG_INFO(log, "Recalculating storage stats");
+    storage->recalculateStats();
+    LOG_INFO(log, "Done recalculating storage stats");
+}
+
 }
diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h
index ffc7fce1cfe..d8181532f09 100644
--- a/src/Coordination/KeeperStateMachine.h
+++ b/src/Coordination/KeeperStateMachine.h
@@ -103,6 +103,7 @@ public:
     uint64_t getKeyArenaSize() const;
     uint64_t getLatestSnapshotBufSize() const;
 
+    void recalculateStorageStats();
 private:
     /// In our state machine we always have a single snapshot which is stored
     /// in memory in compressed (serialized) format.
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 0e4d631938c..b1f3b44b1e1 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -201,9 +201,10 @@ void KeeperStorage::Node::setData(String new_data)
     data = std::move(new_data);
 }
 
-void KeeperStorage::Node::addChild(StringRef child_path)
+void KeeperStorage::Node::addChild(StringRef child_path, bool update_size)
 {
-    size_bytes += sizeof child_path;
+    if (update_size) [[likely]]
+        size_bytes += sizeof child_path;
     children.insert(child_path);
 }
 
@@ -234,6 +235,16 @@ void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other)
     cached_digest = other.cached_digest;
 }
 
+void KeeperStorage::Node::recalculateSize()
+{
+    size_bytes = sizeof(Node);
+
+    for (const auto child_path : children)
+        size_bytes += sizeof child_path;
+
+    size_bytes += data.size();
+}
+
 KeeperStorage::KeeperStorage(
     int64_t tick_time_ms, const String & superdigest_, const KeeperContextPtr & keeper_context_, const bool initialize_system_nodes)
     : session_expiry_queue(tick_time_ms), keeper_context(keeper_context_), superdigest(superdigest_)
@@ -2407,5 +2418,10 @@ uint64_t KeeperStorage::getTotalEphemeralNodesCount() const
     return ret;
 }
 
+void KeeperStorage::recalculateStats()
+{
+    container.recalculateDataSize();
+}
+
 
 }
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index a40cca8e778..be528072df4 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -47,7 +47,7 @@ public:
 
         const auto & getData() const noexcept { return data; }
 
-        void addChild(StringRef child_path);
+        void addChild(StringRef child_path, bool update_size = true);
 
         void removeChild(StringRef child_path);
 
@@ -64,6 +64,8 @@ public:
         // (e.g. we don't need to copy list of children)
         void shallowCopy(const Node & other);
 
+        void recalculateSize();
+
     private:
         String data;
         ChildrenSet children{};
@@ -466,6 +468,7 @@ public:
     void dumpWatchesByPath(WriteBufferFromOwnString & buf) const;
     void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const;
 
+    void recalculateStats();
 private:
     void removeDigest(const Node & node, std::string_view path);
     void addDigest(const Node & node, std::string_view path);
diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index 27572ab86c7..97c7e933334 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -8,6 +8,8 @@
 #include <atomic>
 #include <iostream>
 
+#include <Common/logger_useful.h>
+
 namespace DB
 {
 
@@ -319,6 +321,17 @@ public:
         return approximate_data_size;
     }
 
+    void recalculateDataSize()
+    {
+        approximate_data_size = 0;
+        for (auto & node : list)
+        {
+            node.value.recalculateSize();
+            approximate_data_size += node.key.size;
+            approximate_data_size += node.value.sizeInBytes();
+        }
+    }
+
     uint64_t keyArenaSize() const
     {
         return arena.size();

From 9cde46acff502b8ba8972c9c3ccbc13b904b6563 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 6 Feb 2023 16:09:56 +0000
Subject: [PATCH 040/333] Correctly calculate size on update

---
 src/Coordination/SnapshotableHashTable.h      | 15 +--
 .../test_keeper_mntr_data_size/__init__.py    |  0
 .../configs/enable_keeper.xml                 | 29 ++++++
 .../test_keeper_mntr_data_size/test.py        | 93 +++++++++++++++++++
 4 files changed, 130 insertions(+), 7 deletions(-)
 create mode 100644 tests/integration/test_keeper_mntr_data_size/__init__.py
 create mode 100644 tests/integration/test_keeper_mntr_data_size/configs/enable_keeper.xml
 create mode 100644 tests/integration/test_keeper_mntr_data_size/test.py

diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index 97c7e933334..5b01ca255f8 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -8,8 +8,6 @@
 #include <atomic>
 #include <iostream>
 
-#include <Common/logger_useful.h>
-
 namespace DB
 {
 
@@ -66,7 +64,7 @@ private:
     ///    value_size: size of value to add
     ///    old_value_size: size of value to minus
     /// old_value_size=0 means there is no old value with the same key.
-    void updateDataSize(OperationType op_type, uint64_t key_size, uint64_t value_size, uint64_t old_value_size)
+    void updateDataSize(OperationType op_type, uint64_t key_size, uint64_t value_size, uint64_t old_value_size, bool remove_old = true)
     {
         switch (op_type)
         {
@@ -96,14 +94,14 @@ private:
             case UPDATE_VALUE:
                 approximate_data_size += key_size;
                 approximate_data_size += value_size;
-                if (!snapshot_mode)
+                if (remove_old)
                 {
                     approximate_data_size -= key_size;
                     approximate_data_size -= old_value_size;
                 }
                 break;
             case ERASE:
-                if (!snapshot_mode)
+                if (remove_old)
                 {
                     approximate_data_size -= key_size;
                     approximate_data_size -= old_value_size;
@@ -179,7 +177,7 @@ public:
                 list_itr->value = value;
             }
         }
-        updateDataSize(INSERT_OR_REPLACE, key.size(), value.sizeInBytes(), old_value_size);
+        updateDataSize(INSERT_OR_REPLACE, key.size(), value.sizeInBytes(), old_value_size, !snapshot_mode);
     }
 
     bool erase(const std::string & key)
@@ -224,6 +222,7 @@ public:
 
         const_iterator ret;
 
+        bool remove_old_size = true;
         if (snapshot_mode)
         {
             /// We in snapshot mode but updating some node which is already more
@@ -239,6 +238,8 @@ public:
                 auto itr = list.insert(list.end(), std::move(elem_copy));
                 it->getMapped() = itr;
                 ret = itr;
+
+                remove_old_size = false;
             }
             else
             {
@@ -252,7 +253,7 @@ public:
             ret = list_itr;
         }
 
-        updateDataSize(UPDATE_VALUE, key.size, ret->value.sizeInBytes(), old_value_size);
+        updateDataSize(UPDATE_VALUE, key.size, ret->value.sizeInBytes(), old_value_size, remove_old_size);
         return ret;
     }
 
diff --git a/tests/integration/test_keeper_mntr_data_size/__init__.py b/tests/integration/test_keeper_mntr_data_size/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_keeper_mntr_data_size/configs/enable_keeper.xml b/tests/integration/test_keeper_mntr_data_size/configs/enable_keeper.xml
new file mode 100644
index 00000000000..a3217b34501
--- /dev/null
+++ b/tests/integration/test_keeper_mntr_data_size/configs/enable_keeper.xml
@@ -0,0 +1,29 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <snapshot_distance>10</snapshot_distance>
+            <reserved_log_items>5</reserved_log_items>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+
+            <!-- For instant start in single node configuration -->
+            <heart_beat_interval_ms>0</heart_beat_interval_ms>
+            <election_timeout_lower_bound_ms>0</election_timeout_lower_bound_ms>
+            <election_timeout_upper_bound_ms>0</election_timeout_upper_bound_ms>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>9234</port>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_mntr_data_size/test.py b/tests/integration/test_keeper_mntr_data_size/test.py
new file mode 100644
index 00000000000..8789ca0354c
--- /dev/null
+++ b/tests/integration/test_keeper_mntr_data_size/test.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+import helpers.keeper_utils as keeper_utils
+import random
+import string
+import os
+import time
+from kazoo.client import KazooClient, KazooState
+
+
+cluster = ClickHouseCluster(__file__)
+
+# clickhouse itself will use external zookeeper
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/enable_keeper.xml"],
+    stay_alive=True,
+    with_zookeeper=True,
+)
+
+
+def random_string(length):
+    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def get_connection_zk(nodename, timeout=30.0):
+    _fake_zk_instance = KazooClient(
+        hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout
+    )
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+
+def restart_clickhouse():
+    node.restart_clickhouse()
+    keeper_utils.wait_until_connected(cluster, node)
+
+
+def test_mntr_data_size_after_restart(started_cluster):
+    try:
+        node_zk = None
+        node_zk = get_connection_zk("node")
+
+        node_zk.create("/test_mntr_data_size", b"somevalue")
+        for i in range(100):
+            node_zk.create(
+                "/test_mntr_data_size/node" + str(i), random_string(123).encode()
+            )
+
+        def get_line_with_size():
+            return next(
+                filter(
+                    lambda line: "zk_approximate_data_size" in line,
+                    keeper_utils.send_4lw_cmd(started_cluster, node, "mntr").split(
+                        "\n"
+                    ),
+                ),
+                None,
+            )
+
+        line_size_before = get_line_with_size()
+        assert line_size_before != None
+
+        node_zk.stop()
+        node_zk.close()
+        node_zk = None
+
+        restart_clickhouse()
+
+        assert get_line_with_size() == line_size_before
+
+        keeper_utils.send_4lw_cmd(started_cluster, node, "rclc")
+        assert get_line_with_size() == line_size_before
+    finally:
+        try:
+            if node_zk is not None:
+                node_zk.stop()
+                node_zk.close()
+        except:
+            pass

From b11458f14241f81f22129a1476a9220010c2586e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 7 Feb 2023 07:51:22 +0000
Subject: [PATCH 041/333] Fix erase

---
 src/Coordination/SnapshotableHashTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index 5b01ca255f8..cfa3098b4a1 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -202,7 +202,7 @@ public:
             list.erase(list_itr);
         }
 
-        updateDataSize(ERASE, key.size(), 0, old_data_size);
+        updateDataSize(ERASE, key.size(), 0, old_data_size, !snapshot_mode);
         return true;
     }
 

From fe48ab2b1d9006cdb8e0d937ad8d9a9370020c1c Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 7 Feb 2023 14:58:34 +0000
Subject: [PATCH 042/333] randomize setting
 'ratio_of_defaults_for_sparse_serialization'

---
 tests/clickhouse-test | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 4361b64b62f..e84c9beed5d 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -511,7 +511,9 @@ class SettingsRandomizer:
         "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32),
         "optimize_distinct_in_order": lambda: random.randint(0, 1),
         "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1),
-        "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint(0, 1),
+        "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint(
+            0, 1
+        ),
     }
 
     @staticmethod
@@ -524,10 +526,9 @@ class SettingsRandomizer:
 
 class MergeTreeSettingsRandomizer:
     settings = {
-        # Temporary disable due to large number of failures. TODO: fix.
-        # "ratio_of_defaults_for_sparse_serialization": threshold_generator(
-        #     0.1, 0.6, 0.0, 1.0
-        # ),
+        "ratio_of_defaults_for_sparse_serialization": threshold_generator(
+            0.3, 0.5, 0.0, 1.0
+        ),
         "prefer_fetch_merged_part_size_threshold": threshold_generator(
             0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
         ),
@@ -1222,7 +1223,7 @@ class TestCase:
             )
             result.check_if_need_retry(args, stdout, stderr, self.runs_count)
             # to avoid breaking CSV parser
-            result.description = result.description.replace('\0', '')
+            result.description = result.description.replace("\0", "")
 
             if result.status == TestStatus.FAIL:
                 result.description = self.add_info_about_settings(result.description)
@@ -1678,7 +1679,12 @@ def check_server_started(args):
             print("\nConnection timeout, will not retry")
             break
         except Exception as e:
-            print("\nUexpected exception, will not retry: ", type(e).__name__, ": ", str(e))
+            print(
+                "\nUexpected exception, will not retry: ",
+                type(e).__name__,
+                ": ",
+                str(e),
+            )
             break
 
     print("\nAll connection tries failed")
@@ -1955,6 +1961,7 @@ def reportCoverage(args):
         )
     )
 
+
 def reportLogStats(args):
     query = """
         WITH

From 94407682b4c360b47967a0b02d8ca9eb83767317 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 9 Feb 2023 18:06:15 +0000
Subject: [PATCH 043/333] fix function with sparse columns arguments

---
 src/Columns/ColumnAggregateFunction.h |  5 +++++
 src/Columns/ColumnArray.cpp           |  5 +++++
 src/Columns/ColumnArray.h             |  1 +
 src/Columns/ColumnCompressed.h        |  1 +
 src/Columns/ColumnConst.h             |  5 +++++
 src/Columns/ColumnDecimal.h           |  5 +++++
 src/Columns/ColumnFixedString.h       |  5 +++++
 src/Columns/ColumnFunction.h          |  5 +++++
 src/Columns/ColumnLowCardinality.h    |  5 +++++
 src/Columns/ColumnMap.cpp             |  5 +++++
 src/Columns/ColumnMap.h               |  1 +
 src/Columns/ColumnNullable.h          |  5 +++++
 src/Columns/ColumnObject.h            |  1 +
 src/Columns/ColumnSparse.cpp          | 13 +++++++++----
 src/Columns/ColumnSparse.h            |  2 +-
 src/Columns/ColumnString.h            |  5 +++++
 src/Columns/ColumnTuple.cpp           |  5 +++++
 src/Columns/ColumnTuple.h             |  1 +
 src/Columns/ColumnUnique.h            |  5 +++++
 src/Columns/ColumnVector.h            |  5 +++++
 src/Columns/IColumn.h                 |  8 +++++++-
 src/Columns/IColumnDummy.h            |  5 +++++
 src/Columns/IColumnImpl.h             | 10 ++++++++++
 src/Functions/IFunction.cpp           |  6 ++++--
 24 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index 38040d65d3b..ae486d5d24e 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -223,6 +223,11 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for ColumnAggregateFunction");
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for ColumnAggregateFunction");
+    }
+
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for ColumnAggregateFunction");
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 0d085a3275a..cbeebe52141 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -953,6 +953,11 @@ double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
     return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
 }
 
+UInt64 ColumnArray::getNumberOfDefaultRows() const
+{
+    return getNumberOfDefaultRowsImpl<ColumnArray>();
+}
+
 void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
     return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 44652fd0c4b..e60c388831d 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -173,6 +173,7 @@ public:
     }
 
     double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;
 
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
 
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index b258dbac878..d747f8ef5f4 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -117,6 +117,7 @@ public:
     void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
     size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
     double getRatioOfDefaultRows(double) const override { throwMustBeDecompressed(); }
+    UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); }
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); }
 
 protected:
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index b86ed393e44..fcdcd2ce224 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -253,6 +253,11 @@ public:
         return data->isDefaultAt(0) ? 1.0 : 0.0;
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return data->isDefaultAt(0) ? s : 0;
+    }
+
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
     {
         if (!data->isDefaultAt(0))
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index d47164a178d..03e0b9be558 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -136,6 +136,11 @@ public:
         return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return this->template getNumberOfDefaultRowsImpl<Self>();
+    }
+
     void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
     {
         return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 7c2d9b1a155..39497e3403e 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -200,6 +200,11 @@ public:
         return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getNumberOfDefaultRowsImpl<ColumnFixedString>();
+    }
+
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
     {
         return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h
index 7b7ceb29a10..a1f6245c2bd 100644
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@@ -168,6 +168,11 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
+    }
+
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index e895bc6b54e..1f2dbdc202e 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -199,6 +199,11 @@ public:
         return getIndexes().getRatioOfDefaultRows(sample_ratio);
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getIndexes().getNumberOfDefaultRows();
+    }
+
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
     {
         return getIndexes().getIndicesOfNonDefaultRows(indices, from, limit);
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 2b5f5f94ee8..b533b68b98d 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -296,6 +296,11 @@ double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
     return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
 }
 
+UInt64 ColumnMap::getNumberOfDefaultRows() const
+{
+    return getNumberOfDefaultRowsImpl<ColumnMap>();
+}
+
 void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
     return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index db918c3db50..a11905fcaa0 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -92,6 +92,7 @@ public:
     void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
     bool structureEquals(const IColumn & rhs) const override;
     double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
     void finalize() override { nested->finalize(); }
     bool isFinalized() const override { return nested->isFinalized(); }
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 85bf095a9d1..df0088debc9 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -156,6 +156,11 @@ public:
         return getRatioOfDefaultRowsImpl<ColumnNullable>(sample_ratio);
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getNumberOfDefaultRowsImpl<ColumnNullable>();
+    }
+
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
     {
         getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h
index d09974fcc46..91920908542 100644
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@@ -254,6 +254,7 @@ public:
     bool hasEqualValues() const override { throwMustBeConcrete(); }
     size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
     double getRatioOfDefaultRows(double) const override { throwMustBeConcrete(); }
+    UInt64 getNumberOfDefaultRows() const override { throwMustBeConcrete(); }
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeConcrete(); }
 
 private:
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 43802b6bbc8..9bc78ef115d 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -470,7 +470,7 @@ int ColumnSparse::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs
 
 bool ColumnSparse::hasEqualValues() const
 {
-    size_t num_defaults = getNumberOfDefaults();
+    size_t num_defaults = getNumberOfDefaultRows();
     if (num_defaults == _size)
         return true;
 
@@ -512,7 +512,7 @@ void ColumnSparse::getPermutationImpl(IColumn::PermutationSortDirection directio
     else
         values->getPermutation(direction, stability, limit + 1, null_direction_hint, perm);
 
-    size_t num_of_defaults = getNumberOfDefaults();
+    size_t num_of_defaults = getNumberOfDefaultRows();
     size_t row = 0;
 
     const auto & offsets_data = getOffsetsData();
@@ -677,7 +677,7 @@ void ColumnSparse::getExtremes(Field & min, Field & max) const
         return;
     }
 
-    if (getNumberOfDefaults() == 0)
+    if (getNumberOfDefaultRows() == 0)
     {
         size_t min_idx = 1;
         size_t max_idx = 1;
@@ -709,7 +709,12 @@ void ColumnSparse::getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t
 
 double ColumnSparse::getRatioOfDefaultRows(double) const
 {
-    return static_cast<double>(getNumberOfDefaults()) / _size;
+    return static_cast<double>(getNumberOfDefaultRows()) / _size;
+}
+
+UInt64 ColumnSparse::getNumberOfDefaultRows() const
+{
+    return _size - offsets->size();
 }
 
 MutableColumns ColumnSparse::scatter(ColumnIndex num_columns, const Selector & selector) const
diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h
index ffe392a83c1..d4eb13bf208 100644
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@@ -132,6 +132,7 @@ public:
 
     void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override;
     double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;
 
     MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
 
@@ -150,7 +151,6 @@ public:
     size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); }
     bool isCollationSupported() const override { return values->isCollationSupported(); }
 
-    size_t getNumberOfDefaults() const { return _size - offsets->size(); }
     size_t getNumberOfTrailingDefaults() const
     {
         return offsets->empty() ? _size : _size - getOffsetsData().back() - 1;
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index aa251b1fda0..c9eed212616 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -273,6 +273,11 @@ public:
         return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return getNumberOfDefaultRowsImpl<ColumnString>();
+    }
+
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
     {
         return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index 903540c1859..369842c7281 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -565,6 +565,11 @@ double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
     return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
 }
 
+UInt64 ColumnTuple::getNumberOfDefaultRows() const
+{
+    return getNumberOfDefaultRowsImpl<ColumnTuple>();
+}
+
 void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
 {
     return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index 25f6328b3fc..281fd94d893 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -102,6 +102,7 @@ public:
     bool isCollationSupported() const override;
     ColumnPtr compress() const override;
     double getRatioOfDefaultRows(double sample_ratio) const override;
+    UInt64 getNumberOfDefaultRows() const override;
     void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
     void finalize() override;
     bool isFinalized() const override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 8a95726d2be..377255d80c7 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -146,6 +146,11 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getRatioOfDefaultRows' not implemented for ColumnUnique");
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getNumberOfDefaultRows' not implemented for ColumnUnique");
+    }
+
     void getIndicesOfNonDefaultRows(IColumn::Offsets &, size_t, size_t) const override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getIndicesOfNonDefaultRows' not implemented for ColumnUnique");
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index ded66430160..bf790423d1d 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -358,6 +358,11 @@ public:
         return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return this->template getNumberOfDefaultRowsImpl<Self>();
+    }
+
     void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
     {
         return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 53619c73e5b..d777fd7b240 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -441,10 +441,13 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method structureEquals is not supported for {}", getName());
     }
 
-    /// Returns ration of values in column, that equal to default value of column.
+    /// Returns ratio of values in column, that are equal to default value of column.
     /// Checks only @sample_ratio ratio of rows.
     [[nodiscard]] virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
 
+    /// Returns number of values in column, that are equal to default value of column.
+    [[nodiscard]] virtual UInt64 getNumberOfDefaultRows() const = 0;
+
     /// Returns indices of values in column, that not equal to default value of column.
     virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0;
 
@@ -584,6 +587,9 @@ protected:
     template <typename Derived>
     double getRatioOfDefaultRowsImpl(double sample_ratio) const;
 
+    template <typename Derived>
+    UInt64 getNumberOfDefaultRowsImpl() const;
+
     template <typename Derived>
     void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const;
 
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 0b00ebbdd1f..82d4c857b29 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -168,6 +168,11 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for {}", getName());
     }
 
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for {}", getName());
+    }
+
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getIndicesOfNonDefaultRows is not supported for {}", getName());
diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h
index 8537a4c097c..e676373e211 100644
--- a/src/Columns/IColumnImpl.h
+++ b/src/Columns/IColumnImpl.h
@@ -194,6 +194,16 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
     return static_cast<double>(res) / num_checked_rows;
 }
 
+template <typename Derived>
+UInt64 IColumn::getNumberOfDefaultRowsImpl() const
+{
+    UInt64 res = 0;
+    size_t num_rows = size();
+    for (size_t i = 0; i < num_rows; ++i)
+        res += static_cast<const Derived &>(*this).isDefaultAt(i);
+    return res;
+}
+
 template <typename Derived>
 void IColumn::getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const
 {
diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp
index 1c30dee0482..2163fa27c51 100644
--- a/src/Functions/IFunction.cpp
+++ b/src/Functions/IFunction.cpp
@@ -321,7 +321,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
             const auto * column_sparse = checkAndGetColumn<ColumnSparse>(arguments[i].column.get());
             /// In rare case, when sparse column doesn't have default values,
             /// it's more convenient to convert it to full before execution of function.
-            if (column_sparse && column_sparse->getNumberOfDefaults())
+            if (column_sparse && column_sparse->getNumberOfDefaultRows())
             {
                 sparse_column_position = i;
                 ++num_sparse_columns;
@@ -359,7 +359,9 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
                 return res->cloneResized(input_rows_count);
 
             /// If default of sparse column is changed after execution of function, convert to full column.
-            if (!result_type->supportsSparseSerialization() || !res->isDefaultAt(0))
+            /// If there are any default in non-zero position after execution of function, convert to full column.
+            /// Currently there is no easy way to rebuild sparse column with new offsets.
+            if (!result_type->supportsSparseSerialization() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
             {
                 const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
                 return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1);

From 1fce8e4eedf14f3fa8c1b9bc8ff90ef7a52a311f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 13 Feb 2023 18:35:34 +0000
Subject: [PATCH 044/333] Fix alias resolve with group_by_use_nulls

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 33 +++++++++++++++--------
 src/Planner/PlannerExpressionAnalysis.cpp |  3 ++-
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 003944fa8b0..2e7d0b2c6db 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1,6 +1,5 @@
 #include <Analyzer/Passes/QueryAnalysisPass.h>
 
-#include "Common/logger_useful.h"
 #include <Common/NamePrompter.h>
 #include <Common/ProfileEvents.h>
 
@@ -194,7 +193,6 @@ namespace ErrorCodes
   * TODO: SELECT (compound_expression).*, (compound_expression).COLUMNS are not supported on parser level.
   * TODO: SELECT a.b.c.*, a.b.c.COLUMNS. Qualified matcher where identifier size is greater than 2 are not supported on parser level.
   * TODO: Support function identifier resolve from parent query scope, if lambda in parent scope does not capture any columns.
-  * TODO: Support group_by_use_nulls.
   * TODO: Scalar subqueries cache.
   */
 
@@ -676,7 +674,11 @@ struct IdentifierResolveScope
         if (auto * union_node = scope_node->as<UnionNode>())
             context = union_node->getContext();
         else if (auto * query_node = scope_node->as<QueryNode>())
+        {
             context = query_node->getContext();
+            group_by_use_nulls = context->getSettingsRef().group_by_use_nulls &&
+                (query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube());
+        }
     }
 
     QueryTreeNodePtr scope_node;
@@ -729,6 +731,9 @@ struct IdentifierResolveScope
     /// Use identifier lookup to result cache
     bool use_identifier_lookup_to_result_cache = true;
 
+    /// Apply nullability to aggregation keys
+    bool group_by_use_nulls = false;
+
     /// Subquery depth
     size_t subquery_depth = 0;
 
@@ -3122,7 +3127,7 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
     if (!resolve_result.resolved_identifier ||
         scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) ||
         !scope.use_identifier_lookup_to_result_cache ||
-        scope.context->getSettingsRef().group_by_use_nulls)
+        scope.group_by_use_nulls)
         scope.identifier_lookup_to_result.erase(it);
 
     return resolve_result;
@@ -4650,7 +4655,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         }
 
         function_node.resolveAsFunction(std::move(function_base));
-        if (settings.group_by_use_nulls && scope.nullable_group_by_keys.contains(node))
+        if (scope.group_by_use_nulls && scope.nullable_group_by_keys.contains(node))
             function_node.convertToNullable();
     }
     catch (Exception & e)
@@ -4795,8 +4800,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
                 node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier;
 
                 /// If table identifier is resolved as CTE clone it and resolve
-                auto * subquery_node = node->as<QueryNode>();
-                auto * union_node = node->as<UnionNode>();
+                auto * subquery_node = node ? node->as<QueryNode>() : nullptr;
+                auto * union_node = node ? node->as<UnionNode>() : nullptr;
                 bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
 
                 if (resolved_as_cte)
@@ -4900,6 +4905,12 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             if (result_projection_names.empty())
                 result_projection_names.push_back(column_node.getColumnName());
 
+            if (scope.group_by_use_nulls && scope.nullable_group_by_keys.contains(node))
+            {
+                node = node->clone();
+                node->convertToNullable();
+            }
+
             break;
         }
         case QueryTreeNodeType::FUNCTION:
@@ -4986,7 +4997,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
     /** Update aliases after expression node was resolved.
       * Do not update node in alias table if we resolve it for duplicate alias.
       */
-    if (!node_alias.empty() && use_alias_table)
+    if (!node_alias.empty() && use_alias_table && !scope.group_by_use_nulls)
     {
         auto it = scope.alias_name_to_expression_node.find(node_alias);
         if (it != scope.alias_name_to_expression_node.end())
@@ -6033,7 +6044,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor);
     }
 
-    if (!settings.group_by_use_nulls)
+    if (!scope.group_by_use_nulls)
         scope.use_identifier_lookup_to_result_cache = true;
 
     /// Resolve query node sections.
@@ -6072,7 +6083,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
             resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
 
-            if (settings.group_by_use_nulls)
+            if (scope.group_by_use_nulls)
             {
                 for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
                     scope.nullable_group_by_keys.insert(group_by_elem->clone());
@@ -6270,7 +6281,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                     continue;
 
                 group_by_keys_nodes.push_back(grouping_set_key->clone());
-                if (settings.group_by_use_nulls)
+                if (scope.group_by_use_nulls)
                     group_by_keys_nodes.back()->convertToNullable();
             }
         }
@@ -6280,7 +6291,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                 continue;
 
             group_by_keys_nodes.push_back(node->clone());
-            if (settings.group_by_use_nulls)
+            if (scope.group_by_use_nulls)
                 group_by_keys_nodes.back()->convertToNullable();
         }
     }
diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index d0104b0bf2a..55a5da247f9 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -80,7 +80,8 @@ std::pair<std::optional<AggregationAnalysisResult>, std::optional<ColumnsWithTyp
     PlannerActionsVisitor actions_visitor(planner_context);
 
     /// Add expressions from GROUP BY
-    bool group_by_use_nulls = planner_context->getQueryContext()->getSettingsRef().group_by_use_nulls;
+    bool group_by_use_nulls = planner_context->getQueryContext()->getSettingsRef().group_by_use_nulls &&
+        (query_node.isGroupByWithGroupingSets() || query_node.isGroupByWithRollup() || query_node.isGroupByWithCube());
 
     if (query_node.hasGroupBy())
     {

From 8a3be450c391e020cbb4497b234243e8dbf5d47f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 13 Feb 2023 18:38:07 +0000
Subject: [PATCH 045/333] Add test query

---
 tests/queries/1_stateful/00173_group_by_use_nulls.sql | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.sql b/tests/queries/1_stateful/00173_group_by_use_nulls.sql
index f110ebb0db4..8531e9efaf8 100644
--- a/tests/queries/1_stateful/00173_group_by_use_nulls.sql
+++ b/tests/queries/1_stateful/00173_group_by_use_nulls.sql
@@ -9,6 +9,17 @@ ORDER BY
 LIMIT 10
 SETTINGS group_by_use_nulls = 1;
 
+SELECT
+    CounterID AS k,
+    quantileBFloat16(0.5)(ResolutionWidth)
+FROM test.hits
+GROUP BY k
+ORDER BY
+    count() DESC,
+    CounterID ASC
+LIMIT 10
+SETTINGS group_by_use_nulls = 1 FORMAT Null;
+
 -- { echoOn }
 set allow_experimental_analyzer = 1;
 

From e460aa6272578f53f85a3cb3e108cf0b411abc7a Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 15 Feb 2023 16:00:51 +0000
Subject: [PATCH 046/333] Fix warning

---
 src/Planner/PlannerActionsVisitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp
index 6bd245858fe..c9bae0b2c2a 100644
--- a/src/Planner/PlannerActionsVisitor.cpp
+++ b/src/Planner/PlannerActionsVisitor.cpp
@@ -327,7 +327,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
     actions_stack.pop_back();
 
     // TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver.
-    actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), std::move(function_capture));
+    actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture);
 
     size_t actions_stack_size = actions_stack.size();
     for (size_t i = level + 1; i < actions_stack_size; ++i)

From 3730ea388f4e9de81dce547f309f24801d724d2b Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 15 Feb 2023 19:37:54 +0000
Subject: [PATCH 047/333] fix issues with sparse columns

---
 src/Client/ClientBase.cpp                             |  2 +-
 src/Columns/ColumnAggregateFunction.h                 |  4 ++--
 src/Columns/IColumnImpl.h                             | 11 +++++------
 src/Interpreters/Aggregator.cpp                       |  4 ++++
 .../0_stateless/02432_s3_parallel_parts_cleanup.sql   |  4 ++--
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index bc8c43af8c6..6c6d22bc037 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -488,7 +488,7 @@ void ClientBase::onTotals(Block & block, ASTPtr parsed_query)
 void ClientBase::onExtremes(Block & block, ASTPtr parsed_query)
 {
     initOutputFormat(block, parsed_query);
-    output_format->setExtremes(block);
+    output_format->setExtremes(materializeBlock(block));
 }
 
 
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index ae486d5d24e..f9ce45708c9 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -220,12 +220,12 @@ public:
 
     double getRatioOfDefaultRows(double) const override
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRatioOfDefaultRows is not supported for ColumnAggregateFunction");
+        return 0.0;
     }
 
     UInt64 getNumberOfDefaultRows() const override
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getNumberOfDefaultRows is not supported for ColumnAggregateFunction");
+        return 0;
     }
 
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override
diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h
index e676373e211..20aaefae39c 100644
--- a/src/Columns/IColumnImpl.h
+++ b/src/Columns/IColumnImpl.h
@@ -164,13 +164,9 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
         throw Exception(ErrorCodes::LOGICAL_ERROR,
             "Value of 'sample_ratio' must be in interval (0.0; 1.0], but got: {}", sample_ratio);
 
-    /// Randomize a little to avoid boundary effects.
-    std::uniform_int_distribution<size_t> dist(1, static_cast<size_t>(1.0 / sample_ratio));
-
     size_t num_rows = size();
-    size_t num_sampled_rows = static_cast<size_t>(num_rows * sample_ratio);
-    size_t num_checked_rows = dist(thread_local_rng);
-    num_sampled_rows = std::min(num_sampled_rows + dist(thread_local_rng), num_rows);
+    size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * sample_ratio), num_rows);
+    size_t num_checked_rows = 0;
     size_t res = 0;
 
     if (num_sampled_rows == num_rows)
@@ -191,6 +187,9 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
         }
     }
 
+    if (num_checked_rows == 0)
+        return 0.0;
+
     return static_cast<double>(res) / num_checked_rows;
 }
 
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index da4442f99d3..de172a6d7ac 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -1416,6 +1416,10 @@ void Aggregator::prepareAggregateInstructions(
             materialized_columns.push_back(columns.at(pos)->convertToFullColumnIfConst());
             aggregate_columns[i][j] = materialized_columns.back().get();
 
+            /// Sparse columns without defaults may be handled incorrectly.
+            if (aggregate_columns[i][j]->getNumberOfDefaultRows() == 0)
+                allow_sparse_arguments = false;
+
             auto full_column = allow_sparse_arguments
                 ? aggregate_columns[i][j]->getPtr()
                 : recursiveRemoveSparse(aggregate_columns[i][j]->getPtr());
diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
index 235a3335d9d..3688a649d5e 100644
--- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
+++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
@@ -7,7 +7,7 @@ drop table if exists rmt2;
 create table rmt (n int, m int, k int) engine=ReplicatedMergeTree('/test/02432/{database}', '1') order by tuple()
     settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1,
         max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
-        max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_compact_part=0, min_rows_for_compact_part=0;
+        max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0;
 
 insert into rmt(n, m) values (1, 42);
 insert into rmt(n, m) values (2, 42);
@@ -37,7 +37,7 @@ select count(), sum(n), sum(m) from rmt;
 create table rmt2 (n int, m int, k String) engine=ReplicatedMergeTree('/test/02432/{database}', '2') order by tuple()
     settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1,
         max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
-        min_bytes_for_compact_part=0, min_rows_for_compact_part=0, max_replicated_merges_in_queue=1,
+        min_bytes_for_wide_part=0, min_rows_for_wide_part=0, max_replicated_merges_in_queue=1,
         old_parts_lifetime=0;
 
 alter table rmt2 modify column k Nullable(String);

From c24d68e7c9bb03d12d61fb5e64023daea26875c6 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 16 Feb 2023 21:53:53 +0000
Subject: [PATCH 048/333] fix tests with sparse columns

---
 src/Columns/IColumnImpl.h                                 | 6 ++++--
 .../0_stateless/01780_column_sparse_distinct.reference    | 1 +
 .../queries/0_stateless/01780_column_sparse_distinct.sql  | 2 +-
 .../0_stateless/02179_sparse_columns_detach.reference     | 8 ++++----
 tests/queries/0_stateless/02179_sparse_columns_detach.sql | 8 ++++----
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h
index 20aaefae39c..0eab9452813 100644
--- a/src/Columns/IColumnImpl.h
+++ b/src/Columns/IColumnImpl.h
@@ -164,12 +164,14 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
         throw Exception(ErrorCodes::LOGICAL_ERROR,
             "Value of 'sample_ratio' must be in interval (0.0; 1.0], but got: {}", sample_ratio);
 
+    static constexpr auto max_number_of_rows_for_full_search = 1000;
+
     size_t num_rows = size();
     size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * sample_ratio), num_rows);
     size_t num_checked_rows = 0;
     size_t res = 0;
 
-    if (num_sampled_rows == num_rows)
+    if (num_sampled_rows == num_rows || num_rows <= max_number_of_rows_for_full_search)
     {
         for (size_t i = 0; i < num_rows; ++i)
             res += static_cast<const Derived &>(*this).isDefaultAt(i);
@@ -177,7 +179,7 @@ double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const
     }
     else if (num_sampled_rows != 0)
     {
-        for (size_t i = num_checked_rows; i < num_rows; ++i)
+        for (size_t i = 0; i < num_rows; ++i)
         {
             if (num_checked_rows * num_rows <= i * num_sampled_rows)
             {
diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.reference b/tests/queries/0_stateless/01780_column_sparse_distinct.reference
index bb0cebc6540..beb45208e7b 100644
--- a/tests/queries/0_stateless/01780_column_sparse_distinct.reference
+++ b/tests/queries/0_stateless/01780_column_sparse_distinct.reference
@@ -5,3 +5,4 @@ all_2_2_0	v	Sparse
 2
 3
 4
+5
diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.sql b/tests/queries/0_stateless/01780_column_sparse_distinct.sql
index 502ca7600d4..e98bada1aac 100644
--- a/tests/queries/0_stateless/01780_column_sparse_distinct.sql
+++ b/tests/queries/0_stateless/01780_column_sparse_distinct.sql
@@ -7,7 +7,7 @@ SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
 
 SYSTEM STOP MERGES t_sparse_distinct;
 
-INSERT INTO t_sparse_distinct SELECT number, number % 5 FROM numbers(100000);
+INSERT INTO t_sparse_distinct SELECT number, number % 6 FROM numbers(100000);
 INSERT INTO t_sparse_distinct SELECT number, number % 100 = 0 FROM numbers(100000);
 
 SELECT name, column, serialization_kind
diff --git a/tests/queries/0_stateless/02179_sparse_columns_detach.reference b/tests/queries/0_stateless/02179_sparse_columns_detach.reference
index 2f9714f7a97..04a9b10c09f 100644
--- a/tests/queries/0_stateless/02179_sparse_columns_detach.reference
+++ b/tests/queries/0_stateless/02179_sparse_columns_detach.reference
@@ -1,12 +1,12 @@
-1000
+954
 id	Default
 s	Sparse
-1000
+954
 id	Default
 s	Sparse
-1000
+954
 id	Default
 s	Sparse
-1000
+954
 id	Default
 s	Sparse
diff --git a/tests/queries/0_stateless/02179_sparse_columns_detach.sql b/tests/queries/0_stateless/02179_sparse_columns_detach.sql
index 4720e6720ba..2ae088fedb4 100644
--- a/tests/queries/0_stateless/02179_sparse_columns_detach.sql
+++ b/tests/queries/0_stateless/02179_sparse_columns_detach.sql
@@ -4,8 +4,8 @@ CREATE TABLE t_sparse_detach(id UInt64, s String)
 ENGINE = MergeTree ORDER BY id
 SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
 
-INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000);
-INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000);
+INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000);
+INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000);
 
 OPTIMIZE TABLE t_sparse_detach FINAL;
 
@@ -30,8 +30,8 @@ ALTER TABLE t_sparse_detach
     MODIFY SETTING vertical_merge_algorithm_min_rows_to_activate = 1,
     vertical_merge_algorithm_min_columns_to_activate = 1;
 
-INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000);
-INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000);
+INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000);
+INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000);
 
 OPTIMIZE TABLE t_sparse_detach FINAL;
 

From 81f5f9708df0048010d42a2ddb334c6c1add22bb Mon Sep 17 00:00:00 2001
From: artem-yadr <ciceronazaz2002@gmail.com>
Date: Fri, 17 Feb 2023 01:39:23 +0300
Subject: [PATCH 049/333] Added support for MongoDB Replica Set URI with
 enumiration

---
 .../MongoDB/include/Poco/MongoDB/Connection.h | 278 +++++++++---------
 base/poco/MongoDB/src/Connection.cpp          | 196 ++++++++----
 2 files changed, 283 insertions(+), 191 deletions(-)

diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
index de669aa90dd..319e1f93ba0 100644
--- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
+++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
@@ -18,150 +18,154 @@
 #define MongoDB_Connection_INCLUDED
 
 
-#include "Poco/MongoDB/RequestMessage.h"
-#include "Poco/MongoDB/ResponseMessage.h"
-#include "Poco/Mutex.h"
 #include "Poco/Net/SocketAddress.h"
 #include "Poco/Net/StreamSocket.h"
+#include "Poco/Mutex.h"
+#include "Poco/MongoDB/RequestMessage.h"
+#include "Poco/MongoDB/ResponseMessage.h"
 
 
-namespace Poco
+namespace Poco {
+namespace MongoDB {
+
+
+class MongoDB_API Connection
+	/// Represents a connection to a MongoDB server
+	/// using the MongoDB wire protocol.
+	///
+	/// See https://docs.mongodb.com/manual/reference/mongodb-wire-protocol/
+	/// for more information on the wire protocol.
 {
-namespace MongoDB
+public:
+	typedef Poco::SharedPtr<Connection> Ptr;
+
+	class MongoDB_API SocketFactory
+	{
+	public:
+		SocketFactory();
+			/// Creates the SocketFactory.
+
+		virtual ~SocketFactory();
+			/// Destroys the SocketFactory.
+
+		virtual Poco::Net::StreamSocket createSocket(const std::string& host, int port, Poco::Timespan connectTimeout, bool secure);
+			/// Creates a Poco::Net::StreamSocket (if secure is false), or a
+			/// Poco::Net::SecureStreamSocket (if secure is true) connected to the
+			/// given host and port number.
+			///
+			/// The default implementation will throw a Poco::NotImplementedException
+			/// if secure is true.
+	};
+
+	Connection();
+		/// Creates an unconnected Connection.
+		///
+		/// Use this when you want to connect later on.
+
+	Connection(const std::string& hostAndPort);
+		/// Creates a Connection connected to the given MongoDB instance at host:port.
+		///
+		/// The host and port must be separated with a colon.
+
+	Connection(const std::string& uri, SocketFactory& socketFactory);
+		/// Creates a Connection connected to the given MongoDB instance at the
+		/// given URI.
+		///
+		/// See the corresponding connect() method for more information.
+
+	Connection(const std::string& host, int port);
+		/// Creates a Connection connected to the given MongoDB instance at host and port.
+
+	Connection(const Poco::Net::SocketAddress& addrs);
+		/// Creates a Connection connected to the given MongoDB instance at the given address.
+
+	Connection(const Poco::Net::StreamSocket& socket);
+		/// Creates a Connection connected to the given MongoDB instance using the given socket,
+		/// which must already be connected.
+
+	virtual ~Connection();
+		/// Destroys the Connection.
+
+	Poco::Net::SocketAddress address() const;
+		/// Returns the address of the MongoDB server.
+
+    	std::string uri() const;
+        	/// Returns the uri on which the connection was made.
+
+	void connect(const std::string& hostAndPort);
+		/// Connects to the given MongoDB server.
+		///
+		/// The host and port must be separated with a colon.
+
+	void connect(const std::string& uri, SocketFactory& socketFactory);
+		/// Connects to the given MongoDB instance at the given URI.
+		///
+		/// The URI must be in standard MongoDB connection string URI format:
+		///
+		///     mongodb://<user>:<password>@hostname.com:<port>/database-name?options
+		///
+		/// The following options are supported:
+		///
+		///   - ssl: If ssl=true is specified, a custom SocketFactory subclass creating
+		///     a SecureStreamSocket must be supplied.
+		///   - connectTimeoutMS: Socket connection timeout in milliseconds.
+		///   - socketTimeoutMS: Socket send/receive timeout in milliseconds.
+		///   - authMechanism: Authentication mechanism. Only "SCRAM-SHA-1" (default)
+		///     and "MONGODB-CR" are supported.
+		///
+		/// Unknown options are silently ignored.
+		///
+		/// Will also attempt to authenticate using the specified credentials,
+		/// using Database::authenticate().
+		///
+		/// Throws a Poco::NoPermissionException if authentication fails.
+
+	void connect(const std::string& host, int port);
+		/// Connects to the given MongoDB server.
+
+	void connect(const Poco::Net::SocketAddress& addrs);
+		/// Connects to the given MongoDB server.
+
+	void connect(const Poco::Net::StreamSocket& socket);
+		/// Connects using an already connected socket.
+
+	void disconnect();
+		/// Disconnects from the MongoDB server.
+
+	void sendRequest(RequestMessage& request);
+		/// Sends a request to the MongoDB server.
+		///
+		/// Used for one-way requests without a response.
+
+	void sendRequest(RequestMessage& request, ResponseMessage& response);
+		/// Sends a request to the MongoDB server and receives the response.
+		///
+		/// Use this when a response is expected: only a "query" or "getmore"
+		/// request will return a response.
+
+protected:
+	void connect();
+
+private:
+	Poco::Net::SocketAddress _address;
+	Poco::Net::StreamSocket _socket;
+    	std::string _uri;
+};
+
+
+//
+// inlines
+//
+inline Net::SocketAddress Connection::address() const
 {
-
-
-    class MongoDB_API Connection
-    /// Represents a connection to a MongoDB server
-    /// using the MongoDB wire protocol.
-    ///
-    /// See https://docs.mongodb.com/manual/reference/mongodb-wire-protocol/
-    /// for more information on the wire protocol.
-    {
-    public:
-        typedef Poco::SharedPtr<Connection> Ptr;
-
-        class MongoDB_API SocketFactory
-        {
-        public:
-            SocketFactory();
-            /// Creates the SocketFactory.
-
-            virtual ~SocketFactory();
-            /// Destroys the SocketFactory.
-
-            virtual Poco::Net::StreamSocket createSocket(const std::string & host, int port, Poco::Timespan connectTimeout, bool secure);
-            /// Creates a Poco::Net::StreamSocket (if secure is false), or a
-            /// Poco::Net::SecureStreamSocket (if secure is true) connected to the
-            /// given host and port number.
-            ///
-            /// The default implementation will throw a Poco::NotImplementedException
-            /// if secure is true.
-        };
-
-        Connection();
-        /// Creates an unconnected Connection.
-        ///
-        /// Use this when you want to connect later on.
-
-        Connection(const std::string & hostAndPort);
-        /// Creates a Connection connected to the given MongoDB instance at host:port.
-        ///
-        /// The host and port must be separated with a colon.
-
-        Connection(const std::string & uri, SocketFactory & socketFactory);
-        /// Creates a Connection connected to the given MongoDB instance at the
-        /// given URI.
-        ///
-        /// See the corresponding connect() method for more information.
-
-        Connection(const std::string & host, int port);
-        /// Creates a Connection connected to the given MongoDB instance at host and port.
-
-        Connection(const Poco::Net::SocketAddress & addrs);
-        /// Creates a Connection connected to the given MongoDB instance at the given address.
-
-        Connection(const Poco::Net::StreamSocket & socket);
-        /// Creates a Connection connected to the given MongoDB instance using the given socket,
-        /// which must already be connected.
-
-        virtual ~Connection();
-        /// Destroys the Connection.
-
-        Poco::Net::SocketAddress address() const;
-        /// Returns the address of the MongoDB server.
-
-        void connect(const std::string & hostAndPort);
-        /// Connects to the given MongoDB server.
-        ///
-        /// The host and port must be separated with a colon.
-
-        void connect(const std::string & uri, SocketFactory & socketFactory);
-        /// Connects to the given MongoDB instance at the given URI.
-        ///
-        /// The URI must be in standard MongoDB connection string URI format:
-        ///
-        ///     mongodb://<user>:<password>@hostname.com:<port>/database-name?options
-        ///
-        /// The following options are supported:
-        ///
-        ///   - ssl: If ssl=true is specified, a custom SocketFactory subclass creating
-        ///     a SecureStreamSocket must be supplied.
-        ///   - connectTimeoutMS: Socket connection timeout in milliseconds.
-        ///   - socketTimeoutMS: Socket send/receive timeout in milliseconds.
-        ///   - authMechanism: Authentication mechanism. Only "SCRAM-SHA-1" (default)
-        ///     and "MONGODB-CR" are supported.
-        ///
-        /// Unknown options are silently ignored.
-        ///
-        /// Will also attempt to authenticate using the specified credentials,
-        /// using Database::authenticate().
-        ///
-        /// Throws a Poco::NoPermissionException if authentication fails.
-
-        void connect(const std::string & host, int port);
-        /// Connects to the given MongoDB server.
-
-        void connect(const Poco::Net::SocketAddress & addrs);
-        /// Connects to the given MongoDB server.
-
-        void connect(const Poco::Net::StreamSocket & socket);
-        /// Connects using an already connected socket.
-
-        void disconnect();
-        /// Disconnects from the MongoDB server.
-
-        void sendRequest(RequestMessage & request);
-        /// Sends a request to the MongoDB server.
-        ///
-        /// Used for one-way requests without a response.
-
-        void sendRequest(RequestMessage & request, ResponseMessage & response);
-        /// Sends a request to the MongoDB server and receives the response.
-        ///
-        /// Use this when a response is expected: only a "query" or "getmore"
-        /// request will return a response.
-
-    protected:
-        void connect();
-
-    private:
-        Poco::Net::SocketAddress _address;
-        Poco::Net::StreamSocket _socket;
-    };
-
-
-    //
-    // inlines
-    //
-    inline Net::SocketAddress Connection::address() const
-    {
-        return _address;
-    }
-
-
+	return _address;
 }
-} // namespace Poco::MongoDB
+inline std::string Connection::uri() const
+{
+    	return _uri;
+}
+
+} } // namespace Poco::MongoDB
 
 
 #endif // MongoDB_Connection_INCLUDED
diff --git a/base/poco/MongoDB/src/Connection.cpp b/base/poco/MongoDB/src/Connection.cpp
index 56bb192cec2..411b59c774e 100644
--- a/base/poco/MongoDB/src/Connection.cpp
+++ b/base/poco/MongoDB/src/Connection.cpp
@@ -145,71 +145,159 @@ void Connection::connect(const Poco::Net::StreamSocket& socket)
 
 void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
 {
-	Poco::URI theURI(uri);
-	if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
+    std::vector<std::string> strAddresses;
+    std::string newURI;
 
-	std::string userInfo = theURI.getUserInfo();
-	std::string host = theURI.getHost();
-	Poco::UInt16 port = theURI.getPort();
-	if (port == 0) port = 27017;
+    if (uri.find(',') != std::string::npos)
+    {
+        size_t pos;
+        size_t head = 0;
+        if ((pos = uri.find("@")) != std::string::npos)
+        {
+            head = pos + 1;
+        }
+        else if ((pos = uri.find("://")) != std::string::npos)
+        {
+            head = pos + 3;
+        }
 
-	std::string databaseName = theURI.getPath();
-	if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
-	if (databaseName.empty()) databaseName = "admin";
+        std::string tempstr;
+        std::string::const_iterator it = uri.begin();
+        it += head;
+        size_t tail = head;
+        for (;it != uri.end() && *it != '?' && *it != '/'; ++it)
+        {
+            tempstr += *it;
+            tail++;
+        }
 
-	bool ssl = false;
-	Poco::Timespan connectTimeout;
-	Poco::Timespan socketTimeout;
-	std::string authMechanism = Database::AUTH_SCRAM_SHA1;
+        it = tempstr.begin();
+        std::string token;
+        for (;it != tempstr.end(); ++it)
+        {
+            if (*it == ',')
+            {
+                newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
+                strAddresses.push_back(newURI);
+                token = "";
+            }
+            else
+            {
+                token += *it;
+            }
+        }
+        newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length());
+        strAddresses.push_back(newURI);
+    }
+    else
+    {
+        strAddresses.push_back(uri);
+    }
 
-	Poco::URI::QueryParameters params = theURI.getQueryParameters();
-	for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
-	{
-		if (it->first == "ssl")
-		{
-			ssl = (it->second == "true");
-		}
-		else if (it->first == "connectTimeoutMS")
-		{
-			connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
-		}
-		else if (it->first == "socketTimeoutMS")
-		{
-			socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
-		}
-		else if (it->first == "authMechanism")
-		{
-			authMechanism = it->second;
-		}
-	}
+    newURI = strAddresses.front();
+    Poco::URI theURI(newURI);
+    if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri);
 
-	connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
+    std::string userInfo = theURI.getUserInfo();
+    std::string databaseName = theURI.getPath();
+    if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1);
+    if (databaseName.empty()) databaseName = "admin";
 
-	if (socketTimeout > 0)
-	{
-		_socket.setSendTimeout(socketTimeout);
-		_socket.setReceiveTimeout(socketTimeout);
-	}
+    bool ssl = false;
+    Poco::Timespan connectTimeout;
+    Poco::Timespan socketTimeout;
+    std::string authMechanism = Database::AUTH_SCRAM_SHA1;
+    std::string readPreference="primary";
 
-	if (!userInfo.empty())
-	{
-		std::string username;
-		std::string password;
-		std::string::size_type pos = userInfo.find(':');
-		if (pos != std::string::npos)
-		{
-			username.assign(userInfo, 0, pos++);
-			password.assign(userInfo, pos, userInfo.size() - pos);
-		}
-		else username = userInfo;
+    Poco::URI::QueryParameters params = theURI.getQueryParameters();
+    for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
+    {
+        if (it->first == "ssl")
+        {
+            ssl = (it->second == "true");
+        }
+        else if (it->first == "connectTimeoutMS")
+        {
+            connectTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
+        }
+        else if (it->first == "socketTimeoutMS")
+        {
+            socketTimeout = static_cast<Poco::Timespan::TimeDiff>(1000)*Poco::NumberParser::parse(it->second);
+        }
+        else if (it->first == "authMechanism")
+        {
+            authMechanism = it->second;
+        }
+        else if (it->first == "readPreference")
+        {
+            readPreference= it->second;
+        }
+    }
 
-		Database database(databaseName);
-		if (!database.authenticate(*this, username, password, authMechanism))
-			throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
-	}
+    for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
+    {
+        newURI = *it;
+        Poco::URI theURI(newURI);
+
+        std::string host = theURI.getHost();
+        Poco::UInt16 port = theURI.getPort();
+        if (port == 0) port = 27017;
+
+        connect(socketFactory.createSocket(host, port, connectTimeout, ssl));
+        _uri = newURI;
+        if (socketTimeout > 0)
+        {
+            _socket.setSendTimeout(socketTimeout);
+            _socket.setReceiveTimeout(socketTimeout);
+        }
+        if (strAddresses.size() > 1)
+        {
+            Poco::MongoDB::QueryRequest request("admin.$cmd");
+            request.setNumberToReturn(1);
+            request.selector().add("isMaster", 1);
+            Poco::MongoDB::ResponseMessage response;
+
+            sendRequest(request, response);
+            _uri = newURI;
+            if (!response.documents().empty())
+            {
+                Poco::MongoDB::Document::Ptr doc = response.documents()[0];
+                if (doc->get<bool>("ismaster") && readPreference == "primary")
+                {
+                    break;
+                }
+                else if (!doc->get<bool>("ismaster") && readPreference == "secondary")
+                {
+                    break;
+                }
+                else if (it + 1 == strAddresses.cend())
+                {
+                    throw Poco::URISyntaxException(uri);
+                }
+            }
+        }
+    }
+    if (!userInfo.empty())
+    {
+        std::string username;
+        std::string password;
+        std::string::size_type pos = userInfo.find(':');
+        if (pos != std::string::npos)
+        {
+            username.assign(userInfo, 0, pos++);
+            password.assign(userInfo, pos, userInfo.size() - pos);
+        }
+        else username = userInfo;
+
+        Database database(databaseName);
+
+        if (!database.authenticate(*this, username, password, authMechanism))
+            throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username));
+    }
 }
 
 
+
 void Connection::disconnect()
 {
 	_socket.close();

From 4a2efc339bdc25ae5332b3f6fcd08a48e4530653 Mon Sep 17 00:00:00 2001
From: artem-yadr <ciceronazaz2002@gmail.com>
Date: Fri, 17 Feb 2023 01:47:36 +0300
Subject: [PATCH 050/333] Header fix

---
 .../MongoDB/include/Poco/MongoDB/Connection.h | 280 +++++++++---------
 1 file changed, 142 insertions(+), 138 deletions(-)

diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
index 319e1f93ba0..699b9652eaf 100644
--- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
+++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
@@ -18,154 +18,158 @@
 #define MongoDB_Connection_INCLUDED
 
 
-#include "Poco/Net/SocketAddress.h"
-#include "Poco/Net/StreamSocket.h"
-#include "Poco/Mutex.h"
 #include "Poco/MongoDB/RequestMessage.h"
 #include "Poco/MongoDB/ResponseMessage.h"
+#include "Poco/Mutex.h"
+#include "Poco/Net/SocketAddress.h"
+#include "Poco/Net/StreamSocket.h"
 
 
-namespace Poco {
-namespace MongoDB {
-
-
-class MongoDB_API Connection
-	/// Represents a connection to a MongoDB server
-	/// using the MongoDB wire protocol.
-	///
-	/// See https://docs.mongodb.com/manual/reference/mongodb-wire-protocol/
-	/// for more information on the wire protocol.
+namespace Poco
 {
-public:
-	typedef Poco::SharedPtr<Connection> Ptr;
-
-	class MongoDB_API SocketFactory
-	{
-	public:
-		SocketFactory();
-			/// Creates the SocketFactory.
-
-		virtual ~SocketFactory();
-			/// Destroys the SocketFactory.
-
-		virtual Poco::Net::StreamSocket createSocket(const std::string& host, int port, Poco::Timespan connectTimeout, bool secure);
-			/// Creates a Poco::Net::StreamSocket (if secure is false), or a
-			/// Poco::Net::SecureStreamSocket (if secure is true) connected to the
-			/// given host and port number.
-			///
-			/// The default implementation will throw a Poco::NotImplementedException
-			/// if secure is true.
-	};
-
-	Connection();
-		/// Creates an unconnected Connection.
-		///
-		/// Use this when you want to connect later on.
-
-	Connection(const std::string& hostAndPort);
-		/// Creates a Connection connected to the given MongoDB instance at host:port.
-		///
-		/// The host and port must be separated with a colon.
-
-	Connection(const std::string& uri, SocketFactory& socketFactory);
-		/// Creates a Connection connected to the given MongoDB instance at the
-		/// given URI.
-		///
-		/// See the corresponding connect() method for more information.
-
-	Connection(const std::string& host, int port);
-		/// Creates a Connection connected to the given MongoDB instance at host and port.
-
-	Connection(const Poco::Net::SocketAddress& addrs);
-		/// Creates a Connection connected to the given MongoDB instance at the given address.
-
-	Connection(const Poco::Net::StreamSocket& socket);
-		/// Creates a Connection connected to the given MongoDB instance using the given socket,
-		/// which must already be connected.
-
-	virtual ~Connection();
-		/// Destroys the Connection.
-
-	Poco::Net::SocketAddress address() const;
-		/// Returns the address of the MongoDB server.
-
-    	std::string uri() const;
-        	/// Returns the uri on which the connection was made.
-
-	void connect(const std::string& hostAndPort);
-		/// Connects to the given MongoDB server.
-		///
-		/// The host and port must be separated with a colon.
-
-	void connect(const std::string& uri, SocketFactory& socketFactory);
-		/// Connects to the given MongoDB instance at the given URI.
-		///
-		/// The URI must be in standard MongoDB connection string URI format:
-		///
-		///     mongodb://<user>:<password>@hostname.com:<port>/database-name?options
-		///
-		/// The following options are supported:
-		///
-		///   - ssl: If ssl=true is specified, a custom SocketFactory subclass creating
-		///     a SecureStreamSocket must be supplied.
-		///   - connectTimeoutMS: Socket connection timeout in milliseconds.
-		///   - socketTimeoutMS: Socket send/receive timeout in milliseconds.
-		///   - authMechanism: Authentication mechanism. Only "SCRAM-SHA-1" (default)
-		///     and "MONGODB-CR" are supported.
-		///
-		/// Unknown options are silently ignored.
-		///
-		/// Will also attempt to authenticate using the specified credentials,
-		/// using Database::authenticate().
-		///
-		/// Throws a Poco::NoPermissionException if authentication fails.
-
-	void connect(const std::string& host, int port);
-		/// Connects to the given MongoDB server.
-
-	void connect(const Poco::Net::SocketAddress& addrs);
-		/// Connects to the given MongoDB server.
-
-	void connect(const Poco::Net::StreamSocket& socket);
-		/// Connects using an already connected socket.
-
-	void disconnect();
-		/// Disconnects from the MongoDB server.
-
-	void sendRequest(RequestMessage& request);
-		/// Sends a request to the MongoDB server.
-		///
-		/// Used for one-way requests without a response.
-
-	void sendRequest(RequestMessage& request, ResponseMessage& response);
-		/// Sends a request to the MongoDB server and receives the response.
-		///
-		/// Use this when a response is expected: only a "query" or "getmore"
-		/// request will return a response.
-
-protected:
-	void connect();
-
-private:
-	Poco::Net::SocketAddress _address;
-	Poco::Net::StreamSocket _socket;
-    	std::string _uri;
-};
-
-
-//
-// inlines
-//
-inline Net::SocketAddress Connection::address() const
+namespace MongoDB
 {
-	return _address;
-}
-inline std::string Connection::uri() const
+
+
+    class MongoDB_API Connection
+    /// Represents a connection to a MongoDB server
+    /// using the MongoDB wire protocol.
+    ///
+    /// See https://docs.mongodb.com/manual/reference/mongodb-wire-protocol/
+    /// for more information on the wire protocol.
+    {
+    public:
+        typedef Poco::SharedPtr<Connection> Ptr;
+
+        class MongoDB_API SocketFactory
+        {
+        public:
+            SocketFactory();
+            /// Creates the SocketFactory.
+
+            virtual ~SocketFactory();
+            /// Destroys the SocketFactory.
+
+            virtual Poco::Net::StreamSocket createSocket(const std::string & host, int port, Poco::Timespan connectTimeout, bool secure);
+            /// Creates a Poco::Net::StreamSocket (if secure is false), or a
+            /// Poco::Net::SecureStreamSocket (if secure is true) connected to the
+            /// given host and port number.
+            ///
+            /// The default implementation will throw a Poco::NotImplementedException
+            /// if secure is true.
+        };
+
+        Connection();
+        /// Creates an unconnected Connection.
+        ///
+        /// Use this when you want to connect later on.
+
+        Connection(const std::string & hostAndPort);
+        /// Creates a Connection connected to the given MongoDB instance at host:port.
+        ///
+        /// The host and port must be separated with a colon.
+
+        Connection(const std::string & uri, SocketFactory & socketFactory);
+        /// Creates a Connection connected to the given MongoDB instance at the
+        /// given URI.
+        ///
+        /// See the corresponding connect() method for more information.
+
+        Connection(const std::string & host, int port);
+        /// Creates a Connection connected to the given MongoDB instance at host and port.
+
+        Connection(const Poco::Net::SocketAddress & addrs);
+        /// Creates a Connection connected to the given MongoDB instance at the given address.
+
+        Connection(const Poco::Net::StreamSocket & socket);
+        /// Creates a Connection connected to the given MongoDB instance using the given socket,
+        /// which must already be connected.
+
+        virtual ~Connection();
+        /// Destroys the Connection.
+
+        Poco::Net::SocketAddress address() const;
+        /// Returns the address of the MongoDB server.
+        
+        std::string uri() const;
+        /// Returns the uri on which the connection was made.
+
+        void connect(const std::string & hostAndPort);
+        /// Connects to the given MongoDB server.
+        ///
+        /// The host and port must be separated with a colon.
+
+        void connect(const std::string & uri, SocketFactory & socketFactory);
+        /// Connects to the given MongoDB instance at the given URI.
+        ///
+        /// The URI must be in standard MongoDB connection string URI format:
+        ///
+        ///     mongodb://<user>:<password>@hostname.com:<port>/database-name?options
+        ///
+        /// The following options are supported:
+        ///
+        ///   - ssl: If ssl=true is specified, a custom SocketFactory subclass creating
+        ///     a SecureStreamSocket must be supplied.
+        ///   - connectTimeoutMS: Socket connection timeout in milliseconds.
+        ///   - socketTimeoutMS: Socket send/receive timeout in milliseconds.
+        ///   - authMechanism: Authentication mechanism. Only "SCRAM-SHA-1" (default)
+        ///     and "MONGODB-CR" are supported.
+        ///
+        /// Unknown options are silently ignored.
+        ///
+        /// Will also attempt to authenticate using the specified credentials,
+        /// using Database::authenticate().
+        ///
+        /// Throws a Poco::NoPermissionException if authentication fails.
+
+        void connect(const std::string & host, int port);
+        /// Connects to the given MongoDB server.
+
+        void connect(const Poco::Net::SocketAddress & addrs);
+        /// Connects to the given MongoDB server.
+
+        void connect(const Poco::Net::StreamSocket & socket);
+        /// Connects using an already connected socket.
+
+        void disconnect();
+        /// Disconnects from the MongoDB server.
+
+        void sendRequest(RequestMessage & request);
+        /// Sends a request to the MongoDB server.
+        ///
+        /// Used for one-way requests without a response.
+
+        void sendRequest(RequestMessage & request, ResponseMessage & response);
+        /// Sends a request to the MongoDB server and receives the response.
+        ///
+        /// Use this when a response is expected: only a "query" or "getmore"
+        /// request will return a response.
+
+    protected:
+        void connect();
+
+    private:
+        Poco::Net::SocketAddress _address;
+        Poco::Net::StreamSocket _socket;
+        std::string _uri;
+    };
+
+
+    //
+    // inlines
+    //
+    inline Net::SocketAddress Connection::address() const
+    {
+        return _address;
+    }
+    inline std::string Connection::uri() const
 {
     	return _uri;
 }
 
-} } // namespace Poco::MongoDB
+
+}
+} // namespace Poco::MongoDB
 
 
 #endif // MongoDB_Connection_INCLUDED

From 83dbde476b1a9554efa0d1edb6d93e9460964a17 Mon Sep 17 00:00:00 2001
From: artem-yadr <ciceronazaz2002@gmail.com>
Date: Fri, 17 Feb 2023 01:53:42 +0300
Subject: [PATCH 051/333] move brackets and delete newline

---
 base/poco/MongoDB/src/Connection.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/base/poco/MongoDB/src/Connection.cpp b/base/poco/MongoDB/src/Connection.cpp
index 411b59c774e..c75211cf9d7 100644
--- a/base/poco/MongoDB/src/Connection.cpp
+++ b/base/poco/MongoDB/src/Connection.cpp
@@ -297,7 +297,6 @@ void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
 }
 
 
-
 void Connection::disconnect()
 {
 	_socket.close();

From 4999d6cb1a0398328ddad048a90d53a0cab89ec7 Mon Sep 17 00:00:00 2001
From: artem-yadr <84010375+artem-yadr@users.noreply.github.com>
Date: Fri, 17 Feb 2023 01:58:31 +0300
Subject: [PATCH 052/333] Update Connection.h

---
 base/poco/MongoDB/include/Poco/MongoDB/Connection.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
index 699b9652eaf..8fd9c7919e4 100644
--- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
+++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
@@ -163,9 +163,9 @@ namespace MongoDB
         return _address;
     }
     inline std::string Connection::uri() const
-{
+    {
     	return _uri;
-}
+    }
 
 
 }

From 08734d4dc0a0ef5f46838248138488e880f757f2 Mon Sep 17 00:00:00 2001
From: artem-yadr <ciceronazaz2002@gmail.com>
Date: Fri, 17 Feb 2023 14:56:21 +0300
Subject: [PATCH 053/333] poco changes are now used in MongoDBDictionarySource

---
 src/Dictionaries/MongoDBDictionarySource.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp
index bec566c29a6..f61efcab026 100644
--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@@ -114,7 +114,11 @@ MongoDBDictionarySource::MongoDBDictionarySource(
 {
     if (!uri.empty())
     {
-        Poco::URI poco_uri(uri);
+        // Connect with URI.
+        Poco::MongoDB::Connection::SocketFactory socket_factory;
+        connection->connect(uri, socket_factory);
+
+        Poco::URI poco_uri(connection.uri());
 
         // Parse database from URI. This is required for correctness -- the
         // cursor is created using database name and collection name, so we have
@@ -134,10 +138,6 @@ MongoDBDictionarySource::MongoDBDictionarySource(
         {
             user.resize(separator);
         }
-
-        // Connect with URI.
-        Poco::MongoDB::Connection::SocketFactory socket_factory;
-        connection->connect(uri, socket_factory);
     }
     else
     {

From 6b6daf7ae8bdaa98081bb5c2bff259f02522d13f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 17 Feb 2023 14:45:08 +0000
Subject: [PATCH 054/333] Automatic style fix

---
 tests/ci/workflow_approve_rerun_lambda/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index b563a9786c4..fb14dfd2258 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -123,7 +123,7 @@ TRUSTED_CONTRIBUTORS = {
         "BoloniniD",  # Seasoned contributor, HSE
         "tonickkozlov",  # Cloudflare
         "tylerhannan",  # ClickHouse Employee
-        "myrrc", # Mike Kot, DoubleCloud
+        "myrrc",  # Mike Kot, DoubleCloud
     ]
 }
 

From 046e2bb8eac0127f56fdaaedf54bee3cbb8f4fea Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 17 Feb 2023 16:50:20 +0000
Subject: [PATCH 055/333] fix issues with sparse columns

---
 src/Columns/ColumnSparse.cpp                     |  6 ++++--
 .../01201_read_single_thread_in_order.sql        |  2 +-
 .../01551_mergetree_read_in_order_spread.sql     |  2 +-
 .../01710_projection_optimize_materialize.sql    |  2 +-
 .../0_stateless/02008_materialize_column.sql     | 16 ++++++++--------
 .../0_stateless/02402_merge_engine_with_view.sql |  4 ++--
 6 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 9bc78ef115d..0e408fca467 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -450,11 +450,13 @@ void ColumnSparse::compareColumn(const IColumn & rhs, size_t rhs_row_num,
     {
         const auto & rhs_sparse = assert_cast<const ColumnSparse &>(rhs);
         PaddedPODArray<Int8> nested_result;
-        values->compareColumn(rhs_sparse.getValuesColumn(), rhs_sparse.getValueIndex(rhs_row_num),
+        values->compareColumn(
+            rhs_sparse.getValuesColumn(),
+            rhs_sparse.getValueIndex(rhs_row_num),
             nullptr, nested_result, direction, nan_direction_hint);
 
         const auto & offsets_data = getOffsetsData();
-        compare_results.resize_fill(_size, nested_result[0]);
+        std::fill(compare_results.begin(), compare_results.end(), nested_result[0]);
         for (size_t i = 0; i < offsets_data.size(); ++i)
             compare_results[offsets_data[i]] = nested_result[i + 1];
     }
diff --git a/tests/queries/0_stateless/01201_read_single_thread_in_order.sql b/tests/queries/0_stateless/01201_read_single_thread_in_order.sql
index bfe03192891..71437b2e04c 100644
--- a/tests/queries/0_stateless/01201_read_single_thread_in_order.sql
+++ b/tests/queries/0_stateless/01201_read_single_thread_in_order.sql
@@ -6,7 +6,7 @@ CREATE TABLE t
 )
 ENGINE = MergeTree
 ORDER BY number
-SETTINGS index_granularity = 128;
+SETTINGS index_granularity = 128, ratio_of_defaults_for_sparse_serialization = 1.0;
 
 SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
 INSERT INTO t SELECT number FROM numbers(10000000);
diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql
index 1d21d861e20..41d4ff68824 100644
--- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql
+++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql
@@ -1,4 +1,4 @@
--- Tags: no-s3-storage
+-- Tags: no-s3-storage, no-random-merge-tree-settings
 
 DROP TABLE IF EXISTS data_01551;
 
diff --git a/tests/queries/0_stateless/01710_projection_optimize_materialize.sql b/tests/queries/0_stateless/01710_projection_optimize_materialize.sql
index d8251aabaf6..92d3ead828c 100644
--- a/tests/queries/0_stateless/01710_projection_optimize_materialize.sql
+++ b/tests/queries/0_stateless/01710_projection_optimize_materialize.sql
@@ -1,6 +1,6 @@
 drop table if exists z;
 
-create table z (pk Int64, d Date, id UInt64, c UInt64) Engine MergeTree partition by d order by pk ;
+create table z (pk Int64, d Date, id UInt64, c UInt64) Engine MergeTree partition by d order by pk settings ratio_of_defaults_for_sparse_serialization = 1.0;
 
 insert into z  select number, '2021-10-24', intDiv (number, 10000), 1 from numbers(1000000);
 optimize table z final;
diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql
index 8a8eb2afe83..a78920d2525 100644
--- a/tests/queries/0_stateless/02008_materialize_column.sql
+++ b/tests/queries/0_stateless/02008_materialize_column.sql
@@ -8,33 +8,33 @@ INSERT INTO tmp SELECT * FROM system.numbers LIMIT 20;
 ALTER TABLE tmp MATERIALIZE COLUMN x; -- { serverError 36 }
 
 ALTER TABLE tmp ADD COLUMN s String DEFAULT toString(x);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(arraySort(groupArray(x))), groupArray(s) FROM tmp;
 
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+1);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 
 ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 
 ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 ALTER TABLE tmp DROP COLUMN s;
 
 ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 
 ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(x+1);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 
 ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(x+2);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 
 ALTER TABLE tmp MATERIALIZE COLUMN s;
 ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(x+3);
-SELECT groupArray(x), groupArray(s) FROM tmp;
+SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp;
 ALTER TABLE tmp DROP COLUMN s;
 
 DROP TABLE tmp;
diff --git a/tests/queries/0_stateless/02402_merge_engine_with_view.sql b/tests/queries/0_stateless/02402_merge_engine_with_view.sql
index 64822784845..ae9de1426e7 100644
--- a/tests/queries/0_stateless/02402_merge_engine_with_view.sql
+++ b/tests/queries/0_stateless/02402_merge_engine_with_view.sql
@@ -1,7 +1,7 @@
 -- #40014
-CREATE TABLE m0 (id UInt64) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1;
+CREATE TABLE m0 (id UInt64) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1, ratio_of_defaults_for_sparse_serialization = 1.0;
 INSERT INTO m0 SELECT number FROM numbers(10);
-CREATE TABLE m1 (id UInt64, s String) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1;
+CREATE TABLE m1 (id UInt64, s String) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1, ratio_of_defaults_for_sparse_serialization = 1.0;
 INSERT INTO m1 SELECT number, 'boo' FROM numbers(10);
 CREATE VIEW m1v AS SELECT id FROM m1;
 

From 8da3594cd8463eb0974d79d3829f5ece93cc7477 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Mon, 20 Feb 2023 17:42:56 +0000
Subject: [PATCH 056/333] Fix IPv4/IPv6 serialization/deserialization in binary
 formats

---
 src/Formats/CapnProtoUtils.cpp                |  2 +
 src/Functions/array/FunctionArrayMapped.h     |  1 +
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    | 37 ++++++++
 .../Formats/Impl/AvroRowInputFormat.cpp       | 11 +++
 .../Formats/Impl/AvroRowOutputFormat.cpp      | 14 +++
 .../Impl/BSONEachRowRowInputFormat.cpp        | 92 +++++++++++++------
 .../Impl/BSONEachRowRowOutputFormat.cpp       | 18 ++++
 .../Formats/Impl/CHColumnToArrowColumn.cpp    | 54 +++++++++++
 .../Formats/Impl/CapnProtoRowInputFormat.cpp  |  3 +
 .../Formats/Impl/CapnProtoRowOutputFormat.cpp |  5 +
 .../Formats/Impl/MsgPackRowInputFormat.cpp    | 11 +++
 .../Formats/Impl/MsgPackRowOutputFormat.cpp   | 12 +++
 .../Formats/Impl/ORCBlockOutputFormat.cpp     | 15 +++
 .../02566_ipv4_ipv6_binary_formats.reference  | 18 ++++
 .../02566_ipv4_ipv6_binary_formats.sh         | 45 +++++++++
 .../format_schemas/02566_ipv4_ipv6.capnp      |  6 ++
 16 files changed, 316 insertions(+), 28 deletions(-)
 create mode 100644 tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference
 create mode 100755 tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh
 create mode 100644 tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp

diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp
index e5f619faff5..a557b762672 100644
--- a/src/Formats/CapnProtoUtils.cpp
+++ b/src/Formats/CapnProtoUtils.cpp
@@ -317,6 +317,7 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr
         case TypeIndex::UInt16:
             return capnp_type.isUInt16();
         case TypeIndex::DateTime: [[fallthrough]];
+        case TypeIndex::IPv4: [[fallthrough]];
         case TypeIndex::UInt32:
             return capnp_type.isUInt32();
         case TypeIndex::UInt64:
@@ -355,6 +356,7 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr
         case TypeIndex::LowCardinality:
             return checkCapnProtoType(capnp_type, assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType(), mode, error_message, column_name);
         case TypeIndex::FixedString: [[fallthrough]];
+        case TypeIndex::IPv6: [[fallthrough]];
         case TypeIndex::String:
             return capnp_type.isText() || capnp_type.isData();
         default:
diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h
index 5092698d01c..5d758826f6a 100644
--- a/src/Functions/array/FunctionArrayMapped.h
+++ b/src/Functions/array/FunctionArrayMapped.h
@@ -7,6 +7,7 @@
 #include <Columns/ColumnFunction.h>
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnNullable.h>
+#include <Columns/ColumnLowCardinality.h>
 #include <Columns/IColumn.h>
 
 #include <Common/Exception.h>
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index ef25b5a332f..b7a914427a2 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -17,6 +17,7 @@
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeIPv4andIPv6.h>
 #include <Common/DateLUTImpl.h>
 #include <base/types.h>
 #include <Processors/Chunk.h>
@@ -527,6 +528,38 @@ static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr
     return std::make_shared<arrow::ChunkedArray>(array_vector);
 }
 
+static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
+{
+    size_t total_size = 0;
+    for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
+    {
+        auto & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
+        const size_t chunk_length = chunk.length();
+
+        for (size_t i = 0; i != chunk_length; ++i)
+        {
+            /// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6.
+            if (chunk.value_length(i) != sizeof(IPv6))
+                return readColumnWithStringData<arrow::BinaryArray>(arrow_column, column_name);
+        }
+        total_size += chunk_length;
+    }
+
+    auto internal_type = std::make_shared<DataTypeIPv6>();
+    auto internal_column = internal_type->createColumn();
+    auto & data = assert_cast<ColumnIPv6 &>(*internal_column).getData();
+    data.reserve(total_size * sizeof(IPv6));
+
+    for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
+    {
+        auto & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
+        std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
+        const auto * raw_data = reinterpret_cast<const IPv6 *>(buffer->data());
+        data.insert_assume_reserved(raw_data, raw_data + chunk.length());
+    }
+    return {std::move(internal_column), std::move(internal_type), column_name};
+}
+
 static ColumnWithTypeAndName readColumnFromArrowColumn(
     std::shared_ptr<arrow::ChunkedArray> & arrow_column,
     const std::string & column_name,
@@ -558,7 +591,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
     {
         case arrow::Type::STRING:
         case arrow::Type::BINARY:
+        {
+            if (type_hint && isIPv6(type_hint))
+                return readIPv6ColumnFromBinaryData(arrow_column, column_name);
             return readColumnWithStringData<arrow::BinaryArray>(arrow_column, column_name);
+        }
         case arrow::Type::FIXED_SIZE_BINARY:
             return readColumnWithFixedStringData(arrow_column, column_name);
         case arrow::Type::LARGE_BINARY:
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index eacd29e0db1..26f5a76776e 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -145,6 +145,9 @@ static void insertNumber(IColumn & column, WhichDataType type, T value)
         case TypeIndex::DateTime64:
             assert_cast<ColumnDecimal<DateTime64> &>(column).insertValue(static_cast<Int64>(value));
             break;
+        case TypeIndex::IPv4:
+            assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
+            break;
         default:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type is not compatible with Avro");
     }
@@ -386,6 +389,14 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
                     column.insertData(reinterpret_cast<const char *>(tmp_fixed.data()), tmp_fixed.size());
                 };
             }
+            else if (target.isIPv6() && fixed_size == sizeof(IPv6))
+            {
+                return [tmp_fixed = std::vector<uint8_t>(fixed_size)](IColumn & column, avro::Decoder & decoder) mutable
+                {
+                    decoder.decodeFixed(tmp_fixed.size(), tmp_fixed);
+                    column.insertData(reinterpret_cast<const char *>(tmp_fixed.data()), tmp_fixed.size());
+                };
+            }
             break;
         }
         case avro::AVRO_SYMBOLIC:
diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
index 8483a91df62..1ca56a1c5cc 100644
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
@@ -127,6 +127,11 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
             {
                 encoder.encodeInt(assert_cast<const ColumnUInt32 &>(column).getElement(row_num));
             }};
+        case TypeIndex::IPv4:
+            return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
+            {
+                encoder.encodeInt(assert_cast<const ColumnIPv4 &>(column).getElement(row_num));
+            }};
         case TypeIndex::Int32:
             return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
             {
@@ -205,6 +210,15 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
                 encoder.encodeFixed(reinterpret_cast<const uint8_t *>(s.data()), s.size());
             }};
         }
+        case TypeIndex::IPv6:
+        {
+            auto schema = avro::FixedSchema(sizeof(IPv6), "ipv6_" + toString(type_name_increment));
+            return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
+            {
+                const std::string_view & s = assert_cast<const ColumnIPv6 &>(column).getDataAt(row_num).toView();
+                encoder.encodeFixed(reinterpret_cast<const uint8_t *>(s.data()), s.size());
+            }};
+        }
         case TypeIndex::Enum8:
         {
             auto schema = avro::EnumSchema("enum8_" + toString(type_name_increment));    /// type names must be different for different types.
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index c4ffce2bc65..e0064c282b4 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -151,6 +151,17 @@ static void readAndInsertInteger(ReadBuffer & in, IColumn & column, const DataTy
     }
 }
 
+static void readAndInsertIPv4(ReadBuffer & in, IColumn & column, BSONType bson_type)
+{
+    /// We expect BSON type Int32 as IPv4 value.
+    if (bson_type != BSONType::INT32)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Int32 into column with type IPv4");
+
+    UInt32 value;
+    readBinary(value, in);
+    assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(value));
+}
+
 template <typename T>
 static void readAndInsertDouble(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
 {
@@ -296,37 +307,52 @@ static void readAndInsertString(ReadBuffer & in, IColumn & column, BSONType bson
     }
 }
 
+static void readAndInsertIPv6(ReadBuffer & in, IColumn & column, BSONType bson_type)
+{
+    if (bson_type != BSONType::BINARY)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into IPv6 column", getBSONTypeName(bson_type));
+
+    auto size = readBSONSize(in);
+    auto subtype = getBSONBinarySubtype(readBSONType(in));
+    if (subtype != BSONBinarySubtype::BINARY)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into IPv6 column", getBSONBinarySubtypeName(subtype));
+
+    if (size != sizeof(IPv6))
+        throw Exception(
+            ErrorCodes::INCORRECT_DATA,
+            "Cannot parse value of type IPv6, size of binary data is not equal to the binary size of IPv6 value: {} != {}",
+            size,
+            sizeof(IPv6));
+
+    IPv6 value;
+    readBinary(value, in);
+    assert_cast<ColumnIPv6 &>(column).insertValue(value);
+}
+
+
 static void readAndInsertUUID(ReadBuffer & in, IColumn & column, BSONType bson_type)
 {
-    if (bson_type == BSONType::BINARY)
-    {
-        auto size = readBSONSize(in);
-        auto subtype = getBSONBinarySubtype(readBSONType(in));
-        if (subtype == BSONBinarySubtype::UUID || subtype == BSONBinarySubtype::UUID_OLD)
-        {
-            if (size != sizeof(UUID))
-                throw Exception(
-                    ErrorCodes::INCORRECT_DATA,
-                    "Cannot parse value of type UUID, size of binary data is not equal to the binary size of UUID value: {} != {}",
-                    size,
-                    sizeof(UUID));
-
-            UUID value;
-            readBinary(value, in);
-            assert_cast<ColumnUUID &>(column).insertValue(value);
-        }
-        else
-        {
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Cannot insert BSON Binary subtype {} into UUID column",
-                getBSONBinarySubtypeName(subtype));
-        }
-    }
-    else
-    {
+    if (bson_type != BSONType::BINARY)
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into UUID column", getBSONTypeName(bson_type));
-    }
+
+    auto size = readBSONSize(in);
+    auto subtype = getBSONBinarySubtype(readBSONType(in));
+    if (subtype != BSONBinarySubtype::UUID && subtype != BSONBinarySubtype::UUID_OLD)
+        throw Exception(
+            ErrorCodes::ILLEGAL_COLUMN,
+            "Cannot insert BSON Binary subtype {} into UUID column",
+            getBSONBinarySubtypeName(subtype));
+
+    if (size != sizeof(UUID))
+        throw Exception(
+            ErrorCodes::INCORRECT_DATA,
+            "Cannot parse value of type UUID, size of binary data is not equal to the binary size of UUID value: {} != {}",
+            size,
+            sizeof(UUID));
+
+    UUID value;
+    readBinary(value, in);
+    assert_cast<ColumnUUID &>(column).insertValue(value);
 }
 
 void BSONEachRowRowInputFormat::readArray(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)
@@ -591,6 +617,16 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
             readAndInsertString<false>(*in, column, bson_type);
             return true;
         }
+        case TypeIndex::IPv4:
+        {
+            readAndInsertIPv4(*in, column, bson_type);
+            return true;
+        }
+        case TypeIndex::IPv6:
+        {
+            readAndInsertIPv6(*in, column, bson_type);
+            return true;
+        }
         case TypeIndex::UUID:
         {
             readAndInsertUUID(*in, column, bson_type);
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
index 211021b0d78..95dd3079687 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@@ -124,6 +124,7 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
         case TypeIndex::Date: [[fallthrough]];
         case TypeIndex::Date32: [[fallthrough]];
         case TypeIndex::Decimal32: [[fallthrough]];
+        case TypeIndex::IPv4: [[fallthrough]];
         case TypeIndex::Int32:
         {
             return size + sizeof(Int32);
@@ -168,6 +169,10 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
             const auto & string_column = assert_cast<const ColumnFixedString &>(column);
             return size + sizeof(BSONSizeT) + string_column.getN() + 1; // Size of data + data + \0 or BSON subtype (in case of BSON binary)
         }
+        case TypeIndex::IPv6:
+        {
+            return size + sizeof(BSONSizeT) + 1 + sizeof(IPv6); // Size of data + BSON binary subtype + 16 bytes of value
+        }
         case TypeIndex::UUID:
         {
             return size + sizeof(BSONSizeT) + 1 + sizeof(UUID); // Size of data + BSON binary subtype + 16 bytes of value
@@ -371,6 +376,19 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
             writeBSONString<ColumnFixedString>(column, row_num, name, out, settings.bson.output_string_as_string);
             break;
         }
+        case TypeIndex::IPv4:
+        {
+            writeBSONNumber<ColumnIPv4, Int32>(BSONType::INT32, column, row_num, name, out);
+            break;
+        }
+        case TypeIndex::IPv6:
+        {
+            writeBSONTypeAndKeyName(BSONType::BINARY, name, out);
+            writeBSONSize(sizeof(IPv6), out);
+            writeBSONType(BSONBinarySubtype::BINARY, out);
+            writeBinary(assert_cast<const ColumnIPv6 &>(column).getElement(row_num), out);
+            break;
+        }
         case TypeIndex::UUID:
         {
             writeBSONTypeAndKeyName(BSONType::BINARY, name, out);
diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index aef54516627..8698b343eb3 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -434,6 +434,46 @@ namespace DB
         checkStatus(status, write_column->getName(), format_name);
     }
 
+    static void fillArrowArrayWithIPv6ColumnData(
+        ColumnPtr write_column,
+        const PaddedPODArray<UInt8> * null_bytemap,
+        const String & format_name,
+        arrow::ArrayBuilder* array_builder,
+        size_t start,
+        size_t end)
+    {
+        const auto & internal_column = assert_cast<const ColumnIPv6 &>(*write_column);
+        const auto & internal_data = internal_column.getData();
+        size_t fixed_length = sizeof(IPv6);
+        arrow::FixedSizeBinaryBuilder & builder = assert_cast<arrow::FixedSizeBinaryBuilder &>(*array_builder);
+        arrow::Status status;
+
+        PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
+        const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
+
+        const uint8_t * data_start = reinterpret_cast<const uint8_t *>(internal_data.data()) + start * fixed_length;
+        status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        checkStatus(status, write_column->getName(), format_name);
+    }
+
+    static void fillArrowArrayWithIPv4ColumnData(
+        ColumnPtr write_column,
+        const PaddedPODArray<UInt8> * null_bytemap,
+        const String & format_name,
+        arrow::ArrayBuilder* array_builder,
+        size_t start,
+        size_t end)
+    {
+        const auto & internal_data = assert_cast<const ColumnIPv4 &>(*write_column).getData();
+        auto & builder = assert_cast<arrow::UInt32Builder &>(*array_builder);
+        arrow::Status status;
+
+        PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
+        const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
+        status = builder.AppendValues(&(internal_data.data() + start)->toUnderType(), end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        checkStatus(status, write_column->getName(), format_name);
+    }
+
     static void fillArrowArrayWithDateColumnData(
         ColumnPtr write_column,
         const PaddedPODArray<UInt8> * null_bytemap,
@@ -541,6 +581,14 @@ namespace DB
             else
                 fillArrowArrayWithStringColumnData<ColumnFixedString, arrow::BinaryBuilder>(column, null_bytemap, format_name, array_builder, start, end);
         }
+        else if (isIPv6(column_type))
+        {
+            fillArrowArrayWithIPv6ColumnData(column, null_bytemap, format_name, array_builder, start, end);
+        }
+        else if (isIPv4(column_type))
+        {
+            fillArrowArrayWithIPv4ColumnData(column, null_bytemap, format_name, array_builder, start, end);
+        }
         else if (isDate(column_type))
         {
             fillArrowArrayWithDateColumnData(column, null_bytemap, format_name, array_builder, start, end);
@@ -781,6 +829,12 @@ namespace DB
         if (isBool(column_type))
             return arrow::boolean();
 
+        if (isIPv6(column_type))
+            return arrow::fixed_size_binary(sizeof(IPv6));
+
+        if (isIPv4(column_type))
+            return arrow::uint32();
+
         const std::string type_name = column_type->getFamilyName();
         if (const auto * arrow_type_it = std::find_if(
                 internal_type_to_arrow_type.begin(),
diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
index 58ace9cfca5..9f37bcc3219 100644
--- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
@@ -128,6 +128,9 @@ static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_t
         case TypeIndex::UInt64:
             assert_cast<ColumnUInt64 &>(column).insertValue(value);
             break;
+        case TypeIndex::IPv4:
+            assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
+            break;
         default:
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer.");
     }
diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
index dcbd5db5f9b..c0f61bbd586 100644
--- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
@@ -111,7 +111,12 @@ static std::optional<capnp::DynamicValue::Reader> convertToDynamicValue(
         case capnp::DynamicValue::Type::INT:
             return capnp::DynamicValue::Reader(column->getInt(row_num));
         case capnp::DynamicValue::Type::UINT:
+        {
+            /// IPv4 column doesn't support getUInt method.
+            if (isIPv4(data_type))
+                return capnp::DynamicValue::Reader(assert_cast<const ColumnIPv4 *>(column.get())->getElement(row_num));
             return capnp::DynamicValue::Reader(column->getUInt(row_num));
+        }
         case capnp::DynamicValue::Type::BOOL:
             return capnp::DynamicValue::Reader(column->getBool(row_num));
         case capnp::DynamicValue::Type::FLOAT:
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index f337eedbb05..5605b87f3a9 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -162,6 +162,11 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value)
             assert_cast<DataTypeDateTime64::ColumnType &>(column).insertValue(value);
             break;
         }
+        case TypeIndex::IPv4:
+        {
+            assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
+            break;
+        }
         default:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack integer into column with type {}.", type->getName());
     }
@@ -190,6 +195,12 @@ static void insertString(IColumn & column, DataTypePtr type, const char * value,
         return;
     }
 
+    if (isIPv6(type) && bin)
+    {
+        assert_cast<ColumnIPv6 &>(column).insertData(value, size);
+        return;
+    }
+
     if (!isStringOrFixedString(type))
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack string into column with type {}.", type->getName());
 
diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp
index a1ed45ec40f..07951d42bc6 100644
--- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp
@@ -56,6 +56,11 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr
             packer.pack_uint32(assert_cast<const ColumnUInt32 &>(column).getElement(row_num));
             return;
         }
+        case TypeIndex::IPv4:
+        {
+            packer.pack_uint32(assert_cast<const ColumnIPv4 &>(column).getElement(row_num));
+            return;
+        }
         case TypeIndex::UInt64:
         {
             packer.pack_uint64(assert_cast<const ColumnUInt64 &>(column).getElement(row_num));
@@ -110,6 +115,13 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr
             packer.pack_bin_body(string.data(), static_cast<unsigned>(string.size()));
             return;
         }
+        case TypeIndex::IPv6:
+        {
+            const std::string_view & data = assert_cast<const ColumnIPv6 &>(column).getDataAt(row_num).toView();
+            packer.pack_bin(static_cast<unsigned>(data.size()));
+            packer.pack_bin_body(data.data(), static_cast<unsigned>(data.size()));
+            return;
+        }
         case TypeIndex::Array:
         {
             auto nested_type = assert_cast<const DataTypeArray &>(*data_type).getNestedType();
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 42c3e178436..86d9560beb9 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -75,6 +75,7 @@ std::unique_ptr<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr &
             return orc::createPrimitiveType(orc::TypeKind::SHORT);
         }
         case TypeIndex::UInt32: [[fallthrough]];
+        case TypeIndex::IPv4: [[fallthrough]];
         case TypeIndex::Int32:
         {
             return orc::createPrimitiveType(orc::TypeKind::INT);
@@ -109,6 +110,10 @@ std::unique_ptr<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr &
                 return orc::createPrimitiveType(orc::TypeKind::STRING);
             return orc::createPrimitiveType(orc::TypeKind::BINARY);
         }
+        case TypeIndex::IPv6:
+        {
+            return orc::createPrimitiveType(orc::TypeKind::BINARY);
+        }
         case TypeIndex::Nullable:
         {
             return getORCType(removeNullable(type));
@@ -309,6 +314,11 @@ void ORCBlockOutputFormat::writeColumn(
             writeNumbers<UInt32, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; });
             break;
         }
+        case TypeIndex::IPv4:
+        {
+            writeNumbers<IPv4, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const IPv4 & value){ return value.toUnderType(); });
+            break;
+        }
         case TypeIndex::Int64:
         {
             writeNumbers<Int64, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int64 & value){ return value; });
@@ -339,6 +349,11 @@ void ORCBlockOutputFormat::writeColumn(
             writeStrings<ColumnString>(orc_column, column, null_bytemap);
             break;
         }
+        case TypeIndex::IPv6:
+        {
+            writeStrings<ColumnIPv6>(orc_column, column, null_bytemap);
+            break;
+        }
         case TypeIndex::DateTime:
         {
             writeDateTimes<ColumnUInt32>(
diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference
new file mode 100644
index 00000000000..e228d911715
--- /dev/null
+++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference
@@ -0,0 +1,18 @@
+CapnProto
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
+Avro
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
+Arrow
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
+Parquet
+ipv6	Nullable(FixedString(16))					
+ipv4	Nullable(Int64)					
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
+ORC
+ipv6	Nullable(String)					
+ipv4	Nullable(Int32)					
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
+BSONEachRow
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
+MsgPack
+2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh
new file mode 100755
index 00000000000..e84c06027e8
--- /dev/null
+++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+echo "CapnProto"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format CapnProto settings format_schema='format_schemas/02566_ipv4_ipv6:Message'" > 02566_ipv4_ipv6_data.capnp
+${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.capnp, auto, 'ipv6 IPv6, ipv4 IPv4') settings format_schema='format_schemas/02566_ipv4_ipv6:Message'"
+rm 02566_ipv4_ipv6_data.capnp
+
+echo "Avro"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format Avro"  > 02566_ipv4_ipv6_data.avro
+${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.avro, auto, 'ipv6 IPv6, ipv4 IPv4')"
+rm 02566_ipv4_ipv6_data.avro
+
+echo "Arrow"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format Arrow"  > 02566_ipv4_ipv6_data.arrow
+${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.arrow, auto, 'ipv6 IPv6, ipv4 IPv4')"
+rm 02566_ipv4_ipv6_data.arrow
+
+echo "Parquet"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format Parquet"  > 02566_ipv4_ipv6_data.parquet
+${CLICKHOUSE_LOCAL} -q "desc file(02566_ipv4_ipv6_data.parquet)"
+${CLICKHOUSE_LOCAL} -q "select ipv6, toIPv4(ipv4) from file(02566_ipv4_ipv6_data.parquet, auto, 'ipv6 IPv6, ipv4 UInt32')"
+rm 02566_ipv4_ipv6_data.parquet
+
+echo "ORC"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format ORC"  > 02566_ipv4_ipv6_data.orc
+${CLICKHOUSE_LOCAL} -q "desc file(02566_ipv4_ipv6_data.orc)"
+${CLICKHOUSE_LOCAL} -q "select ipv6, toIPv4(ipv4) from file(02566_ipv4_ipv6_data.orc, auto, 'ipv6 IPv6, ipv4 UInt32')"
+rm 02566_ipv4_ipv6_data.orc
+
+echo "BSONEachRow"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format BSONEachRow"  > 02566_ipv4_ipv6_data.bson
+${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.bson, auto, 'ipv6 IPv6, ipv4 IPv4')"
+rm 02566_ipv4_ipv6_data.bson
+
+echo "MsgPack"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format MsgPack"  > 02566_ipv4_ipv6_data.msgpack
+${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.msgpack, auto, 'ipv6 IPv6, ipv4 IPv4')"
+rm 02566_ipv4_ipv6_data.msgpack
+
+
diff --git a/tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp b/tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp
new file mode 100644
index 00000000000..f999043e2d2
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp
@@ -0,0 +1,6 @@
+@0xb6ecde1cd54a101d;
+
+struct Message {
+    ipv4 @0 :UInt32;
+    ipv6 @1 :Data;
+}

From e37f6b545775783bac0a21e8e951c01586ab1778 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Mon, 20 Feb 2023 19:50:25 +0000
Subject: [PATCH 057/333] Update docs

---
 docs/en/interfaces/formats.md | 209 ++++++++++++++++++----------------
 1 file changed, 111 insertions(+), 98 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index e94c6377ae9..f1935131491 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1232,50 +1232,52 @@ Each row is formatted as a single document and each column is formatted as a sin
 
 For output it uses the following correspondence between ClickHouse types and BSON types:
 
-| ClickHouse type                                                                                           | BSON Type                                                                                                 |
-|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
-| [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                            | `\x08` boolean                                                                                            |
-| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `\x10` int32                                                                                              |
-| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `\x10` int32                                                                                              |
-| [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                              |
-| [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                              |
-| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                   | `\x12` int64                                                                                              |
-| [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                   | `\x01` double                                                                                             |
-| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md)               | `\x10` int32                                                                                              |
-| [DateTime](/docs/en/sql-reference/data-types/datetime.md)                                                       | `\x12` int64                                                                                                |
-| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                   | `\x09` datetime                                                                                             |
-| [Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                       | `\x10` int32                                                                                                |
-| [Decimal64](/docs/en/sql-reference/data-types/decimal.md)                                                       | `\x12` int64                                                                                                |
-| [Decimal128](/docs/en/sql-reference/data-types/decimal.md)                                                      | `\x05` binary, `\x00` binary subtype, size = 16                                                               |
-| [Decimal256](/docs/en/sql-reference/data-types/decimal.md)                                                      | `\x05` binary, `\x00` binary subtype, size = 32                                                               |
-| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md)                                                 | `\x05` binary, `\x00` binary subtype, size = 16                                                               |
-| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                                                 | `\x05` binary, `\x00` binary subtype, size = 32                                                               |
+| ClickHouse type                                                                                                       | BSON Type                                                                                                     |
+|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
+| [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                  | `\x08` boolean                                                                                                |
+| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                           | `\x10` int32                                                                                                  |
+| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                                  |
+| [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `\x10` int32                                                                                                  |
+| [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `\x12` int64                                                                                                  |
+| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                                  |
+| [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                         | `\x01` double                                                                                                 |
+| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md)               | `\x10` int32                                                                                                  |
+| [DateTime](/docs/en/sql-reference/data-types/datetime.md)                                                             | `\x12` int64                                                                                                  |
+| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                         | `\x09` datetime                                                                                               |
+| [Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                             | `\x10` int32                                                                                                  |
+| [Decimal64](/docs/en/sql-reference/data-types/decimal.md)                                                             | `\x12` int64                                                                                                  |
+| [Decimal128](/docs/en/sql-reference/data-types/decimal.md)                                                            | `\x05` binary, `\x00` binary subtype, size = 16                                                               |
+| [Decimal256](/docs/en/sql-reference/data-types/decimal.md)                                                            | `\x05` binary, `\x00` binary subtype, size = 32                                                               |
+| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md)                                                       | `\x05` binary, `\x00` binary subtype, size = 16                                                               |
+| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                                                       | `\x05` binary, `\x00` binary subtype, size = 32                                                               |
 | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled |
-| [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                               | `\x05` binary, `\x04` uuid subtype, size = 16                                                                 |
-| [Array](/docs/en/sql-reference/data-types/array.md)                                                             | `\x04` array                                                                                                |
-| [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                             | `\x04` array                                                                                                |
-| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                       | `\x03` document                                                                                             |
-| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys)                                              | `\x03` document                                                                                             |
+| [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                     | `\x05` binary, `\x04` uuid subtype, size = 16                                                                 |
+| [Array](/docs/en/sql-reference/data-types/array.md)                                                                   | `\x04` array                                                                                                  |
+| [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                   | `\x04` array                                                                                                  |
+| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                             | `\x03` document                                                                                               |
+| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys)                                                    | `\x03` document                                                                                               |
+| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                             | `\x10` int32                                                                                                  |
+| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             | `\x05` binary, `\x00` binary subtype                                                                          |
 
 For input it uses the following correspondence between BSON types and ClickHouse types:
 
-| BSON Type                                | ClickHouse Type                                                                                                                                              |
-|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                      |
-| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
-| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                |
-| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                  |
-| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
-| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
-| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                  |
-| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                  |
-| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
-| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                               |
-| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                      |
-| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                              |
-| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
-| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                    |
-| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                         |
+| BSON Type                                | ClickHouse Type                                                                                                                                                                       |
+|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                         |
+| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
+| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                             |
+| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                               |
+| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       |
+| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
+| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
+| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
+| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
+| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                  |
+| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                         |
+| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                 |
+| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
+| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
+| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)     |
 | `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
 
 Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
@@ -1608,23 +1610,25 @@ See also [Format Schema](#formatschema).
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| CapnProto data type (`INSERT`) | ClickHouse data type                                      | CapnProto data type (`SELECT`) |
-|--------------------------------|-----------------------------------------------------------|--------------------------------|
-| `UINT8`, `BOOL`                | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `UINT8`                        |
-| `INT8`                         | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `INT8`                         |
-| `UINT16`                       | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md)         | `UINT16`                       |
-| `INT16`                        | [Int16](/docs/en/sql-reference/data-types/int-uint.md)          | `INT16`                        |
-| `UINT32`                       | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md)         | `UINT32`                       |
-| `INT32`                        | [Int32](/docs/en/sql-reference/data-types/int-uint.md)          | `INT32`                        |
-| `UINT64`                       | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT64`                       |
-| `INT64`                        | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md)          | `INT64`                        |
-| `FLOAT32`                      | [Float32](/docs/en/sql-reference/data-types/float.md)           | `FLOAT32`                      |
-| `FLOAT64`                      | [Float64](/docs/en/sql-reference/data-types/float.md)           | `FLOAT64`                      |
-| `TEXT, DATA`                   | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)               | `TEXT, DATA`                       |
-| `union(T, Void), union(Void, T)`          | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)       | `union(T, Void), union(Void, T)`                       |
-| `ENUM`                         | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md)        | `ENUM`                         |
-| `LIST`                         | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                         |
-| `STRUCT`                       | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                       |
+| CapnProto data type (`INSERT`)   | ClickHouse data type                                                                                                   | CapnProto data type (`SELECT`) |
+|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
+| `UINT8`, `BOOL`                  | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `UINT8`                      |
+| `INT8`                           | [Int8](/docs/en/sql-reference/data-types/int-uint.md)                                                                  | `INT8`                       |
+| `UINT16`                         | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md)             | `UINT16`                     |
+| `INT16`                          | [Int16](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `INT16`                      |
+| `UINT32`                         | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md)     | `UINT32`                     |
+| `INT32`                          | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `INT32`                      |
+| `UINT64`                         | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `UINT64`                     |
+| `INT64`                          | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md)    | `INT64`                      |
+| `FLOAT32`                        | [Float32](/docs/en/sql-reference/data-types/float.md)                                                                  | `FLOAT32`                    |
+| `FLOAT64`                        | [Float64](/docs/en/sql-reference/data-types/float.md)                                                                  | `FLOAT64`                    |
+| `TEXT, DATA`                     | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA`                 |
+| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                               | `union(T, Void), union(Void, T)`                 |
+| `ENUM`                           | [Enum(8\                                                                                                               |16)](/docs/en/sql-reference/data-types/enum.md)  | `ENUM`                         |
+| `LIST`                           | [Array](/docs/en/sql-reference/data-types/array.md)                                                                    | `LIST`                       |
+| `STRUCT`                         | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                    | `STRUCT`                     |
+| `UINT32`                         | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                              | `UINT32`                     |
+| `DATA`                           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                              | `DATA`                       |
 
 For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
 
@@ -1804,21 +1808,23 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| Avro data type `INSERT`                     | ClickHouse data type                                                                               | Avro data type `SELECT`      |
-|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------|
-| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int`                        |
-| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long`                       |
-| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md)                                                    | `float`                      |
-| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md)                                                    | `double`                     |
-| `bytes`, `string`, `fixed`, `enum`          | [String](/docs/en/sql-reference/data-types/string.md)                                                    | `bytes` or `string` \*       |
-| `bytes`, `string`, `fixed`                  | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md)                                       | `fixed(N)`                   |
-| `enum`                                      | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md)                                                                    | `enum`                       |
-| `array(T)`                                  | [Array(T)](/docs/en/sql-reference/data-types/array.md)                                                   | `array(T)`                   |
-| `union(null, T)`, `union(T, null)`          | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                 | `union(null, T)`             |
-| `null`                                      | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md)                     | `null`                       |
-| `int (date)` \**                            | [Date](/docs/en/sql-reference/data-types/date.md)                                                        | `int (date)` \**             |
-| `long (timestamp-millis)` \**               | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md)                                           | `long (timestamp-millis)` \* |
-| `long (timestamp-micros)` \**               | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md)                                           | `long (timestamp-micros)` \* |
+| Avro data type `INSERT`                     | ClickHouse data type                                                                                            | Avro data type `SELECT`                         |
+|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------|
+| `boolean`, `int`, `long`, `float`, `double` | [Int(8\                                                                                                         | 16\                                             |32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int`                        |
+| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long`                                          |
+| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md)                                                           | `float`                                         |
+| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md)                                                           | `double`                                        |
+| `bytes`, `string`, `fixed`, `enum`          | [String](/docs/en/sql-reference/data-types/string.md)                                                           | `bytes` or `string` \*                          |
+| `bytes`, `string`, `fixed`                  | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md)                                              | `fixed(N)`                                      |
+| `enum`                                      | [Enum(8\                                                                                                        | 16)](/docs/en/sql-reference/data-types/enum.md) | `enum`                       |
+| `array(T)`                                  | [Array(T)](/docs/en/sql-reference/data-types/array.md)                                                          | `array(T)`                                      |
+| `union(null, T)`, `union(T, null)`          | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                        | `union(null, T)`                                |
+| `null`                                      | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md)                            | `null`                                          |
+| `int (date)` \**                            | [Date](/docs/en/sql-reference/data-types/date.md)                                                               | `int (date)` \**                                |
+| `long (timestamp-millis)` \**               | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md)                                                  | `long (timestamp-millis)` \*                    |
+| `long (timestamp-micros)` \**               | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md)                                                  | `long (timestamp-micros)` \*                    |
+| `int`                                       | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                       | `int`                                           |
+| `fixed(16)`                                 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                       | `fixed(16)`                                     |
 
 \* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
 \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
@@ -1918,28 +1924,30 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| Parquet data type (`INSERT`)                  | ClickHouse data type                                            | Parquet data type (`SELECT`) |
-|-----------------------------------------------|-----------------------------------------------------------------|------------------------------|
-| `BOOL`                                        | [Bool](/docs/en/sql-reference/data-types/boolean.md)            | `BOOL`                       |
-| `UINT8`, `BOOL`                               | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `UINT8`                      |
-| `INT8`                                        | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `INT8`                       |
-| `UINT16`                                      | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT16`                     |
-| `INT16`                                       | [Int16](/docs/en/sql-reference/data-types/int-uint.md)          | `INT16`                      |
-| `UINT32`                                      | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT32`                     |
-| `INT32`                                       | [Int32](/docs/en/sql-reference/data-types/int-uint.md)          | `INT32`                      |
-| `UINT64`                                      | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT64`                     |
-| `INT64`                                       | [Int64](/docs/en/sql-reference/data-types/int-uint.md)          | `INT64`                      |
-| `FLOAT`                                       | [Float32](/docs/en/sql-reference/data-types/float.md)           | `FLOAT`                      |
-| `DOUBLE`                                      | [Float64](/docs/en/sql-reference/data-types/float.md)           | `DOUBLE`                     |
-| `DATE`                                        | [Date32](/docs/en/sql-reference/data-types/date.md)             | `DATE`                       |
-| `TIME (ms)`                                   | [DateTime](/docs/en/sql-reference/data-types/datetime.md)       | `UINT32`                     |
-| `TIMESTAMP`, `TIME (us, ns)`                  | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)   | `TIMESTAMP`                  |
-| `STRING`, `BINARY`                            | [String](/docs/en/sql-reference/data-types/string.md)           | `BINARY`                     |
-| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY`    |
-| `DECIMAL`                                     | [Decimal](/docs/en/sql-reference/data-types/decimal.md)         | `DECIMAL`                    |
-| `LIST`                                        | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                       |
-| `STRUCT`                                      | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                     |
-| `MAP`                                         | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                        |
+| Parquet data type (`INSERT`)                       | ClickHouse data type                                            | Parquet data type (`SELECT`) |
+|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
+| `BOOL`                                             | [Bool](/docs/en/sql-reference/data-types/boolean.md)            | `BOOL`                       |
+| `UINT8`, `BOOL`                                    | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `UINT8`                      |
+| `INT8`                                             | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `INT8`                       |
+| `UINT16`                                           | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT16`                     |
+| `INT16`                                            | [Int16](/docs/en/sql-reference/data-types/int-uint.md)          | `INT16`                      |
+| `UINT32`                                           | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT32`                     |
+| `INT32`                                            | [Int32](/docs/en/sql-reference/data-types/int-uint.md)          | `INT32`                      |
+| `UINT64`                                           | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT64`                     |
+| `INT64`                                            | [Int64](/docs/en/sql-reference/data-types/int-uint.md)          | `INT64`                      |
+| `FLOAT`                                            | [Float32](/docs/en/sql-reference/data-types/float.md)           | `FLOAT`                      |
+| `DOUBLE`                                           | [Float64](/docs/en/sql-reference/data-types/float.md)           | `DOUBLE`                     |
+| `DATE`                                             | [Date32](/docs/en/sql-reference/data-types/date.md)             | `DATE`                       |
+| `TIME (ms)`                                        | [DateTime](/docs/en/sql-reference/data-types/datetime.md)       | `UINT32`                     |
+| `TIMESTAMP`, `TIME (us, ns)`                       | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)   | `TIMESTAMP`                  |
+| `STRING`, `BINARY`                                 | [String](/docs/en/sql-reference/data-types/string.md)           | `BINARY`                     |
+| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY`      | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY`    |
+| `DECIMAL`                                          | [Decimal](/docs/en/sql-reference/data-types/decimal.md)         | `DECIMAL`                    |
+| `LIST`                                             | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                       |
+| `STRUCT`                                           | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                     |
+| `MAP`                                              | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                        |
+| `UINT32`                                           | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `UINT32`                     |
+| `FIXED_LENGTH_BYTE_ARRAY`                          | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       | `FIXED_LENGTH_BYTE_ARRAY`                     |
 
 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
 
@@ -2006,6 +2014,8 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `LIST`                                  | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                     |
 | `STRUCT`                                | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                   |
 | `MAP`                                   | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                      |
+| `UINT32`                                | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `UINT32`                   |
+| `FIXED_SIZE_BINARY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       | `FIXED_SIZE_BINARY`        |
 
 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
 
@@ -2053,8 +2063,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| ORC data type (`INSERT`)              | ClickHouse data type                                    | ORC data type (`SELECT`) |
-|---------------------------------------|---------------------------------------------------------|--------------------------|
+| ORC data type (`INSERT`)              | ClickHouse data type                                          | ORC data type (`SELECT`) |
+|---------------------------------------|---------------------------------------------------------------|--------------------------|
 | `Boolean`                             | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)        | `Boolean`                |
 | `Tinyint`                             | [Int8](/docs/en/sql-reference/data-types/int-uint.md)         | `Tinyint`                |
 | `Smallint`                            | [Int16](/docs/en/sql-reference/data-types/int-uint.md)        | `Smallint`               |
@@ -2069,6 +2079,7 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `List`                                | [Array](/docs/en/sql-reference/data-types/array.md)           | `List`                   |
 | `Struct`                              | [Tuple](/docs/en/sql-reference/data-types/tuple.md)           | `Struct`                 |
 | `Map`                                 | [Map](/docs/en/sql-reference/data-types/map.md)               | `Map`                    |
+| `-`                                   | [IPv4](/docs/en/sql-reference/data-types/int-uint.md)         | `Int`                    |
 
 Other types are not supported.
 
@@ -2263,8 +2274,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
 
 ### Data Types Matching {#data-types-matching-msgpack}
 
-| MessagePack data type (`INSERT`)                                   | ClickHouse data type                                      | MessagePack data type (`SELECT`)   |
-|--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------|
+| MessagePack data type (`INSERT`)                                   | ClickHouse data type                                            | MessagePack data type (`SELECT`)   |
+|--------------------------------------------------------------------|-----------------------------------------------------------------|------------------------------------|
 | `uint N`, `positive fixint`                                        | [UIntN](/docs/en/sql-reference/data-types/int-uint.md)          | `uint N`                           |
 | `int N`, `negative fixint`                                         | [IntN](/docs/en/sql-reference/data-types/int-uint.md)           | `int N`                            |
 | `bool`                                                             | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `uint 8`                           |
@@ -2277,6 +2288,8 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data
 | `uint 64`                                                          | [DateTime64](/docs/en/sql-reference/data-types/datetime.md)     | `uint 64`                          |
 | `fixarray`, `array 16`, `array 32`                                 | [Array](/docs/en/sql-reference/data-types/array.md)             | `fixarray`, `array 16`, `array 32` |
 | `fixmap`, `map 16`, `map 32`                                       | [Map](/docs/en/sql-reference/data-types/map.md)                 | `fixmap`, `map 16`, `map 32`       |
+| `uint 32`                                                          | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `uint 32`                          |
+| `bin 8`                                                            | [String](/docs/en/sql-reference/data-types/string.md)           | `bin 8`                            |
 
 Example:
 

From bf020298100a51ed57f10b0d1f9e2eceedd802bb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 Feb 2023 12:19:58 +0000
Subject: [PATCH 058/333] Add pass for transforming or equality chain to single
 IN

---
 .../Passes/OrEqualityChainToInPass.cpp        | 117 ++++++++++++++++++
 src/Analyzer/Passes/OrEqualityChainToInPass.h |  18 +++
 src/Analyzer/QueryTreePassManager.cpp         |   4 +-
 3 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 src/Analyzer/Passes/OrEqualityChainToInPass.cpp
 create mode 100644 src/Analyzer/Passes/OrEqualityChainToInPass.h

diff --git a/src/Analyzer/Passes/OrEqualityChainToInPass.cpp b/src/Analyzer/Passes/OrEqualityChainToInPass.cpp
new file mode 100644
index 00000000000..212e8252ce4
--- /dev/null
+++ b/src/Analyzer/Passes/OrEqualityChainToInPass.cpp
@@ -0,0 +1,117 @@
+#include <Analyzer/Passes/OrEqualityChainToInPass.h>
+
+#include <Functions/FunctionFactory.h>
+
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/ConstantNode.h>
+#include <Analyzer/HashUtils.h>
+
+namespace DB
+{
+
+class OrEqualityChainToInVisitor : public InDepthQueryTreeVisitorWithContext<OrEqualityChainToInVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<OrEqualityChainToInVisitor>;
+
+    explicit OrEqualityChainToInVisitor(ContextPtr context)
+        : Base(std::move(context))
+    {}
+
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        auto * function_node = node->as<FunctionNode>();
+
+        if (!function_node || function_node->getFunctionName() != "or")
+            return;
+
+        QueryTreeNodes or_operands;
+
+        QueryTreeNodePtrWithHashMap<QueryTreeNodes> node_to_equals_functions;
+
+        for (const auto & argument : function_node->getArguments())
+        {
+            auto * argument_function = argument->as<FunctionNode>();
+            if (!argument_function)
+            {
+                or_operands.push_back(argument);
+                continue;
+            }
+
+            /// collect all equality checks (x = value)
+            if (argument_function->getFunctionName() != "equals")
+            {
+                or_operands.push_back(argument);
+                continue;
+            }
+
+            const auto & equals_arguments = argument_function->getArguments().getNodes();
+            const auto & lhs = equals_arguments[0];
+
+            const auto * rhs_literal = equals_arguments[1]->as<ConstantNode>();
+            if (!rhs_literal)
+            {
+                or_operands.push_back(argument);
+                continue;
+            }
+
+            node_to_equals_functions[lhs].push_back(argument);
+        }
+
+        auto in_function_resolver = FunctionFactory::instance().get("in", getContext());
+
+        for (auto & [lhs, equals_functions] : node_to_equals_functions)
+        {
+            const auto & settings = getSettings();
+            if (equals_functions.size() < settings.optimize_min_equality_disjunction_chain_length && !lhs.node->getResultType()->lowCardinality())
+            {
+                std::move(equals_functions.begin(), equals_functions.end(), std::back_inserter(or_operands));
+                continue;
+            }
+
+            Tuple args;
+            args.reserve(equals_functions.size());
+            /// first we create tuple from RHS of equals functions
+            for (const auto & equals : equals_functions)
+            {
+                const auto * equals_function = equals->as<FunctionNode>();
+                assert(equals_function && equals_function->getFunctionName() == "equals");
+                const auto * rhs_literal = equals_function->getArguments().getNodes()[1]->as<ConstantNode>();
+                assert(rhs_literal);
+                args.push_back(rhs_literal->getValue());
+            }
+
+            auto rhs_node = std::make_shared<ConstantNode>(std::move(args));
+
+            auto in_function = std::make_shared<FunctionNode>("in");
+
+            QueryTreeNodes in_arguments;
+            in_arguments.reserve(2);
+            in_arguments.push_back(lhs.node);
+            in_arguments.push_back(std::move(rhs_node));
+
+            in_function->getArguments().getNodes() = std::move(in_arguments);
+            in_function->resolveAsFunction(in_function_resolver);
+
+            or_operands.push_back(std::move(in_function));
+        }
+
+        if (or_operands.size() == 1)
+            or_operands.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));
+
+        auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
+        function_node->getArguments().getNodes() = std::move(or_operands);
+        function_node->resolveAsFunction(or_function_resolver);
+    }
+};
+
+
+void OrEqualityChainToInPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    OrEqualityChainToInVisitor visitor(std::move(context));
+    visitor.visit(query_tree_node);
+}
+
+}
diff --git a/src/Analyzer/Passes/OrEqualityChainToInPass.h b/src/Analyzer/Passes/OrEqualityChainToInPass.h
new file mode 100644
index 00000000000..86c9c9efd57
--- /dev/null
+++ b/src/Analyzer/Passes/OrEqualityChainToInPass.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+class OrEqualityChainToInPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "OrEqualityChainToIn"; }
+
+    String getDescription() override { return "Transform all the 'or's with equality check to a single IN function"; }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 588457f90f7..e4bc919e051 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -38,6 +38,7 @@
 #include <Analyzer/Passes/GroupingFunctionsResolvePass.h>
 #include <Analyzer/Passes/ArrayExistsToHasPass.h>
 #include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
+#include <Analyzer/Passes/OrEqualityChainToInPass.h>
 
 namespace DB
 {
@@ -145,7 +146,6 @@ private:
 /** ClickHouse query tree pass manager.
   *
   * TODO: Support _shard_num into shardNum() rewriting.
-  * TODO: Support logical expressions optimizer.
   * TODO: Support setting convert_query_to_cnf.
   * TODO: Support setting optimize_using_constraints.
   * TODO: Support setting optimize_substitute_columns.
@@ -262,6 +262,8 @@ void addQueryTreePasses(QueryTreePassManager & manager)
 
     manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
 
+    manager.addPass(std::make_unique<OrEqualityChainToInPass>());
+
     manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
 }
 

From bdad6a480ed157e0727d449de413a7dac2590dab Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 Feb 2023 12:56:09 +0000
Subject: [PATCH 059/333] Add tests

---
 src/Analyzer/Passes/OrEqualityChainToInPass.h |  20 +
 ...00621_regression_for_in_operator.reference |  44 ++
 .../00621_regression_for_in_operator.sql      |   4 +
 .../00736_disjunction_optimisation.reference  | 426 ++++++++++++++++--
 .../00736_disjunction_optimisation.sql        |  24 +
 ...ssions_optimizer_low_cardinality.reference |  42 ++
 ..._expressions_optimizer_low_cardinality.sql |   2 +
 7 files changed, 526 insertions(+), 36 deletions(-)

diff --git a/src/Analyzer/Passes/OrEqualityChainToInPass.h b/src/Analyzer/Passes/OrEqualityChainToInPass.h
index 86c9c9efd57..2a9d6818a4f 100644
--- a/src/Analyzer/Passes/OrEqualityChainToInPass.h
+++ b/src/Analyzer/Passes/OrEqualityChainToInPass.h
@@ -5,6 +5,26 @@
 namespace DB
 {
 
+/**
+ * This pass replaces chains of equality functions inside an OR with a single IN operator.
+ * The replacement is done if:
+ *  - rhs of the equality function is a literal
+ *  - length of chain is at least 'optimize_min_equality_disjunction_chain_length' long OR lhs is LowCardinality
+ *
+ * E.g. (optimize_min_equality_disjunction_chain_length = 2)
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 OR b = 'test' OR a = 2;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE b = 'test' OR a IN (1, 2);
+ * -------------------------------
+ */
+
 class OrEqualityChainToInPass final : public IQueryTreePass
 {
 public:
diff --git a/tests/queries/0_stateless/00621_regression_for_in_operator.reference b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
index 90f0a70449a..c9eb7a08fc6 100644
--- a/tests/queries/0_stateless/00621_regression_for_in_operator.reference
+++ b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
@@ -3,3 +3,47 @@
 2
 2
 2
+QUERY id: 0
+  PROJECTION COLUMNS
+    count() UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
+  JOIN TREE
+    TABLE id: 3, table_name: default.regression_for_in_operator_view
+  WHERE
+    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 5, nodes: 2
+          FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 7, nodes: 2
+                COLUMN id: 8, column_name: g, result_type: String, source_id: 3
+                CONSTANT id: 9, constant_value: Tuple_(\'5\', \'6\'), constant_value_type: Tuple(String, String)
+          CONSTANT id: 10, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
+2
+2
+QUERY id: 0
+  PROJECTION COLUMNS
+    count() UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64
+  JOIN TREE
+    TABLE id: 3, table_name: default.regression_for_in_operator_view
+  WHERE
+    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 5, nodes: 2
+          FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 7, nodes: 2
+                COLUMN id: 8, column_name: g, result_type: String, source_id: 3
+                CONSTANT id: 9, constant_value: \'5\', constant_value_type: String
+          FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 11, nodes: 2
+                COLUMN id: 8, column_name: g, result_type: String, source_id: 3
+                CONSTANT id: 12, constant_value: \'6\', constant_value_type: String
+  SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/00621_regression_for_in_operator.sql b/tests/queries/0_stateless/00621_regression_for_in_operator.sql
index 273f930a90f..db1bcb4a39a 100644
--- a/tests/queries/0_stateless/00621_regression_for_in_operator.sql
+++ b/tests/queries/0_stateless/00621_regression_for_in_operator.sql
@@ -12,9 +12,13 @@ SELECT count() FROM regression_for_in_operator_view WHERE g IN ('5','6');
 
 SET optimize_min_equality_disjunction_chain_length = 1;
 SELECT count() FROM regression_for_in_operator_view WHERE g = '5' OR g = '6';
+SELECT count() FROM regression_for_in_operator_view WHERE g = '5' OR g = '6' SETTINGS allow_experimental_analyzer = 1;
+EXPLAIN QUERY TREE SELECT count() FROM regression_for_in_operator_view WHERE g = '5' OR g = '6' SETTINGS allow_experimental_analyzer = 1;
 
 SET optimize_min_equality_disjunction_chain_length = 3;
 SELECT count() FROM regression_for_in_operator_view WHERE g = '5' OR g = '6';
+SELECT count() FROM regression_for_in_operator_view WHERE g = '5' OR g = '6' SETTINGS allow_experimental_analyzer = 1;
+EXPLAIN QUERY TREE SELECT count() FROM regression_for_in_operator_view WHERE g = '5' OR g = '6' SETTINGS allow_experimental_analyzer = 1;
 
 DROP TABLE regression_for_in_operator_view;
 DROP TABLE regression_for_in_operator;
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.reference b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
index afd698b425e..4210571842f 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.reference
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
@@ -25,6 +25,97 @@
 3	21
 3	22
 3	23
+QUERY id: 0
+  PROJECTION COLUMNS
+    k UInt64
+    s UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  WHERE
+    FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 6, nodes: 2
+          FUNCTION id: 7, function_name: or, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 8, nodes: 2
+                FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
+                  ARGUMENTS
+                    LIST id: 10, nodes: 2
+                      COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 3
+                      CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+                CONSTANT id: 13, constant_value: UInt64_0, constant_value_type: UInt8
+          FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 15, nodes: 2
+                FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
+                  ARGUMENTS
+                    LIST id: 17, nodes: 2
+                      COLUMN id: 18, column_name: s, result_type: UInt64, source_id: 3
+                      CONSTANT id: 19, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+                CONSTANT id: 20, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+QUERY id: 0
+  PROJECTION COLUMNS
+    k UInt64
+    s UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
+  JOIN TREE
+    QUERY id: 3, is_subquery: 1
+      PROJECTION COLUMNS
+        k UInt64
+        s UInt64
+      PROJECTION
+        LIST id: 5, nodes: 2
+          COLUMN id: 6, column_name: k, result_type: UInt64, source_id: 7
+          COLUMN id: 8, column_name: s, result_type: UInt64, source_id: 7
+      JOIN TREE
+        TABLE id: 7, table_name: default.bug
+      WHERE
+        FUNCTION id: 9, function_name: or, function_type: ordinary, result_type: UInt8
+          ARGUMENTS
+            LIST id: 10, nodes: 2
+              FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
+                ARGUMENTS
+                  LIST id: 12, nodes: 2
+                    COLUMN id: 13, column_name: k, result_type: UInt64, source_id: 7
+                    CONSTANT id: 14, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+              CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8
+  WHERE
+    FUNCTION id: 16, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 17, nodes: 2
+          FUNCTION id: 18, function_name: in, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 19, nodes: 2
+                COLUMN id: 20, column_name: s, result_type: UInt64, source_id: 3
+                CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+          CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
 1	1	21	1	1	1
 1	1	22	0	1	1
 1	1	23	0	0	1
@@ -34,42 +125,6 @@
 3	1	21	1	1	1
 3	1	22	0	1	1
 3	1	23	0	0	1
-21	1
-22	1
-23	1
-21	1
-22	1
-23	1
-21	1
-22	1
-23	1
-1	21
-1	22
-1	23
-2	21
-2	22
-2	23
-3	21
-3	22
-3	23
-1	21
-1	22
-1	23
-2	21
-2	22
-2	23
-3	21
-3	22
-3	23
-1	21
-1	22
-1	23
-2	21
-2	22
-2	23
-3	21
-3	22
-3	23
 1	1	21	1	1	1
 1	1	22	0	1	1
 1	1	23	0	0	1
@@ -79,6 +134,53 @@
 3	1	21	1	1	1
 3	1	22	0	1	1
 3	1	23	0	0	1
+QUERY id: 0
+  PROJECTION COLUMNS
+    k UInt64
+    or(equals(k, 1), equals(k, 2), equals(k, 3)) UInt8
+    s UInt64
+    equals(s, 21) UInt8
+    or(equals(s, 21), equals(s, 22)) UInt8
+    or(equals(s, 21), equals(s, 22), equals(s, 23)) UInt8
+  PROJECTION
+    LIST id: 1, nodes: 6
+      COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 5, nodes: 2
+            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 7, nodes: 2
+                  COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+                  CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+      COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 12, nodes: 2
+            COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 13, constant_value: UInt64_21, constant_value_type: UInt8
+      FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 15, nodes: 2
+            FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 17, nodes: 2
+                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 18, constant_value: Tuple_(UInt64_21, UInt64_22), constant_value_type: Tuple(UInt8, UInt8)
+            CONSTANT id: 19, constant_value: UInt64_0, constant_value_type: UInt8
+      FUNCTION id: 20, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 21, nodes: 2
+            FUNCTION id: 22, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 23, nodes: 2
+                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 24, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 25, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  SETTINGS allow_experimental_analyzer=1
 21	1
 22	1
 23	1
@@ -88,3 +190,255 @@
 21	1
 22	1
 23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+QUERY id: 0
+  PROJECTION COLUMNS
+    s UInt64
+    or(equals(s, 21), equals(s, 22), equals(s, 23)) UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 5, nodes: 2
+            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 7, nodes: 2
+                  COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  SETTINGS allow_experimental_analyzer=1
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+QUERY id: 0
+  PROJECTION COLUMNS
+    k UInt64
+    s UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  WHERE
+    FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 6, nodes: 2
+          FUNCTION id: 7, function_name: or, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 8, nodes: 2
+                FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
+                  ARGUMENTS
+                    LIST id: 10, nodes: 2
+                      COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 3
+                      CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+                CONSTANT id: 13, constant_value: UInt64_0, constant_value_type: UInt8
+          FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 15, nodes: 2
+                FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
+                  ARGUMENTS
+                    LIST id: 17, nodes: 2
+                      COLUMN id: 18, column_name: s, result_type: UInt64, source_id: 3
+                      CONSTANT id: 19, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+                CONSTANT id: 20, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+1	21
+1	22
+1	23
+2	21
+2	22
+2	23
+3	21
+3	22
+3	23
+QUERY id: 0
+  PROJECTION COLUMNS
+    k UInt64
+    s UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: s, result_type: UInt64, source_id: 3
+  JOIN TREE
+    QUERY id: 3, is_subquery: 1
+      PROJECTION COLUMNS
+        k UInt64
+        s UInt64
+      PROJECTION
+        LIST id: 5, nodes: 2
+          COLUMN id: 6, column_name: k, result_type: UInt64, source_id: 7
+          COLUMN id: 8, column_name: s, result_type: UInt64, source_id: 7
+      JOIN TREE
+        TABLE id: 7, table_name: default.bug
+      WHERE
+        FUNCTION id: 9, function_name: or, function_type: ordinary, result_type: UInt8
+          ARGUMENTS
+            LIST id: 10, nodes: 2
+              FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
+                ARGUMENTS
+                  LIST id: 12, nodes: 2
+                    COLUMN id: 13, column_name: k, result_type: UInt64, source_id: 7
+                    CONSTANT id: 14, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+              CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8
+  WHERE
+    FUNCTION id: 16, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 17, nodes: 2
+          FUNCTION id: 18, function_name: in, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 19, nodes: 2
+                COLUMN id: 20, column_name: s, result_type: UInt64, source_id: 3
+                CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+          CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
+1	1	21	1	1	1
+1	1	22	0	1	1
+1	1	23	0	0	1
+2	1	21	1	1	1
+2	1	22	0	1	1
+2	1	23	0	0	1
+3	1	21	1	1	1
+3	1	22	0	1	1
+3	1	23	0	0	1
+1	1	21	1	1	1
+1	1	22	0	1	1
+1	1	23	0	0	1
+2	1	21	1	1	1
+2	1	22	0	1	1
+2	1	23	0	0	1
+3	1	21	1	1	1
+3	1	22	0	1	1
+3	1	23	0	0	1
+QUERY id: 0
+  PROJECTION COLUMNS
+    k UInt64
+    or(equals(k, 1), equals(k, 2), equals(k, 3)) UInt8
+    s UInt64
+    equals(s, 21) UInt8
+    or(equals(s, 21), equals(s, 22)) UInt8
+    or(equals(s, 21), equals(s, 22), equals(s, 23)) UInt8
+  PROJECTION
+    LIST id: 1, nodes: 6
+      COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 5, nodes: 2
+            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 7, nodes: 2
+                  COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+                  CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+      COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 12, nodes: 2
+            COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 13, constant_value: UInt64_21, constant_value_type: UInt8
+      FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 15, nodes: 2
+            FUNCTION id: 16, function_name: equals, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 17, nodes: 2
+                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 18, constant_value: UInt64_21, constant_value_type: UInt8
+            FUNCTION id: 19, function_name: equals, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 20, nodes: 2
+                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 21, constant_value: UInt64_22, constant_value_type: UInt8
+      FUNCTION id: 22, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 23, nodes: 2
+            FUNCTION id: 24, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 25, nodes: 2
+                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 26, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 27, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  SETTINGS allow_experimental_analyzer=1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+QUERY id: 0
+  PROJECTION COLUMNS
+    s UInt64
+    or(equals(s, 21), equals(s, 22), equals(s, 23)) UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 5, nodes: 2
+            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 7, nodes: 2
+                  COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.sql b/tests/queries/0_stateless/00736_disjunction_optimisation.sql
index 700221ef7f0..e52ed2ab45b 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.sql
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.sql
@@ -5,17 +5,41 @@ insert into bug values(1,21),(1,22),(1,23),(2,21),(2,22),(2,23),(3,21),(3,22),(3
 set optimize_min_equality_disjunction_chain_length = 2;
 
 select * from bug;
+
 select * from bug where (k =1 or k=2 or k =3) and (s=21 or s=22 or s=23);
+select * from bug where (k =1 or k=2 or k =3) and (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;
+explain query tree select * from bug where (k =1 or k=2 or k =3) and (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;;
+
 select * from (select * from bug where k=1 or k=2 or k=3) where (s=21 or s=22 or s=23);
+select * from (select * from bug where k=1 or k=2 or k=3) where (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select * from (select * from bug where k=1 or k=2 or k=3) where (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;;
+
 select k, (k=1 or k=2 or k=3), s, (s=21), (s=21 or s=22), (s=21 or s=22 or s=23) from bug;
+select k, (k=1 or k=2 or k=3), s, (s=21), (s=21 or s=22), (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select k, (k=1 or k=2 or k=3), s, (s=21), (s=21 or s=22), (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
+
 select s, (s=21 or s=22 or s=23) from bug;
+select s, (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select s, (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
 
 set optimize_min_equality_disjunction_chain_length = 3;
 
 select * from bug;
+
 select * from bug where (k =1 or k=2 or k =3) and (s=21 or s=22 or s=23);
+select * from bug where (k =1 or k=2 or k =3) and (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;
+explain query tree select * from bug where (k =1 or k=2 or k =3) and (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;;
+
 select * from (select * from bug where k=1 or k=2 or k=3) where (s=21 or s=22 or s=23);
+select * from (select * from bug where k=1 or k=2 or k=3) where (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select * from (select * from bug where k=1 or k=2 or k=3) where (s=21 or s=22 or s=23) SETTINGS allow_experimental_analyzer = 1;;
+
 select k, (k=1 or k=2 or k=3), s, (s=21), (s=21 or s=22), (s=21 or s=22 or s=23) from bug;
+select k, (k=1 or k=2 or k=3), s, (s=21), (s=21 or s=22), (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select k, (k=1 or k=2 or k=3), s, (s=21), (s=21 or s=22), (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
+
 select s, (s=21 or s=22 or s=23) from bug;
+select s, (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select s, (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
 
 DROP TABLE bug;
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
index dcfcac737c3..086c0a49b59 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -1,6 +1,48 @@
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
 WHERE a IN (\'x\', \'y\')
+QUERY id: 0
+  PROJECTION COLUMNS
+    a LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+  WHERE
+    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 5, nodes: 2
+          FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 7, nodes: 2
+                COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+                CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
+          CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
 WHERE (b = 0) OR (b = 1)
+QUERY id: 0
+  PROJECTION COLUMNS
+    a LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+  WHERE
+    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 5, nodes: 2
+          FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 7, nodes: 2
+                COLUMN id: 8, column_name: b, result_type: UInt32, source_id: 3
+                CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+          FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 11, nodes: 2
+                COLUMN id: 8, column_name: b, result_type: UInt32, source_id: 3
+                CONSTANT id: 12, constant_value: UInt64_1, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
index be355a05675..f5368679f60 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
@@ -4,7 +4,9 @@ CREATE TABLE t_logical_expressions_optimizer_low_cardinality (a LowCardinality(S
 
 -- LowCardinality case, ignore optimize_min_equality_disjunction_chain_length limit, optimzer applied
 EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y';
+EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y' SETTINGS allow_experimental_analyzer = 1;
 -- Non-LowCardinality case, optimizer not applied for short chains
 EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1;
+EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1 SETTINGS allow_experimental_analyzer = 1;
 
 DROP TABLE t_logical_expressions_optimizer_low_cardinality;

From b66a5bdcf0366b751ca5ad1ecbf00cf6e80ddd6a Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 21 Feb 2023 15:13:29 +0000
Subject: [PATCH 060/333] fix mutations with sparse columns

---
 src/DataTypes/DataTypeTuple.cpp               | 14 +++
 src/DataTypes/DataTypeTuple.h                 |  1 +
 src/DataTypes/IDataType.cpp                   |  9 ++
 src/DataTypes/IDataType.h                     |  1 +
 .../Serializations/SerializationInfo.h        |  1 +
 .../Serializations/SerializationInfoTuple.cpp | 13 +++
 .../Serializations/SerializationInfoTuple.h   |  2 +
 src/Storages/AlterCommands.cpp                |  7 +-
 .../MergedColumnOnlyOutputStream.cpp          |  1 -
 src/Storages/MergeTree/MutateTask.cpp         | 48 +++++++++--
 ...02662_sparse_columns_mutations_1.reference | 13 +++
 .../02662_sparse_columns_mutations_1.sql      | 49 +++++++++++
 ...02662_sparse_columns_mutations_2.reference |  6 ++
 .../02662_sparse_columns_mutations_2.sql      | 33 +++++++
 ...02662_sparse_columns_mutations_3.reference | 11 +++
 .../02662_sparse_columns_mutations_3.sql      | 85 +++++++++++++++++++
 16 files changed, 280 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql

diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index 768f87fe3d4..1e28af3ee54 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -312,6 +312,20 @@ MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const Seriali
     return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
 }
 
+MutableSerializationInfoPtr DataTypeTuple::cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const
+{
+    const auto & old_info_tuple = assert_cast<const SerializationInfoTuple &>(old_info);
+    assert(old_info_tuple.getNumElements() == elems.size());
+
+    MutableSerializationInfos infos;
+    infos.reserve(elems.size());
+    for (size_t i = 0; i < elems.size(); ++i)
+        infos.push_back(elems[i]->cloneSerializationInfo(*old_info_tuple.getElementInfo(i), settings));
+
+    return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
+}
+
+
 SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) const
 {
     if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h
index 152f21015f5..9f3860f78db 100644
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@@ -57,6 +57,7 @@ public:
     SerializationPtr doGetDefaultSerialization() const override;
     SerializationPtr getSerialization(const SerializationInfo & info) const override;
     MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const override;
+    MutableSerializationInfoPtr cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const override;
     SerializationInfoPtr getSerializationInfo(const IColumn & column) const override;
 
     const DataTypePtr & getElement(size_t i) const { return elems[i]; }
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index e0612fbbf36..5cd0b6f659c 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -191,6 +191,15 @@ MutableSerializationInfoPtr IDataType::createSerializationInfo(const Serializati
     return std::make_shared<SerializationInfo>(ISerialization::Kind::DEFAULT, settings);
 }
 
+MutableSerializationInfoPtr IDataType::cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const
+{
+    auto new_kind = old_info.getKind();
+    if (new_kind == ISerialization::Kind::SPARSE && !supportsSparseSerialization())
+        new_kind = ISerialization::Kind::DEFAULT;
+
+    return std::make_shared<SerializationInfo>(new_kind, settings);
+}
+
 SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
 {
     if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index bafe03dbc3a..32e3b53fc01 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -104,6 +104,7 @@ public:
     Names getSubcolumnNames() const;
 
     virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const;
+    virtual MutableSerializationInfoPtr cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const;
     virtual SerializationInfoPtr getSerializationInfo(const IColumn & column) const;
 
     /// TODO: support more types.
diff --git a/src/DataTypes/Serializations/SerializationInfo.h b/src/DataTypes/Serializations/SerializationInfo.h
index a4a5685253f..4ae5cf8c193 100644
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@@ -51,6 +51,7 @@ public:
     virtual ~SerializationInfo() = default;
 
     virtual bool hasCustomSerialization() const { return kind != ISerialization::Kind::DEFAULT; }
+    virtual bool structureEquals(const SerializationInfo & rhs) const { return typeid(SerializationInfo) == typeid(rhs); }
 
     virtual void add(const IColumn & column);
     virtual void add(const SerializationInfo & other);
diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
index 6c326743e8a..1a9639a1566 100644
--- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
@@ -28,6 +28,19 @@ bool SerializationInfoTuple::hasCustomSerialization() const
     return std::any_of(elems.begin(), elems.end(), [](const auto & elem) { return elem->hasCustomSerialization(); });
 }
 
+bool SerializationInfoTuple::structureEquals(const SerializationInfo & rhs) const
+{
+    const auto * rhs_tuple = typeid_cast<const SerializationInfoTuple *>(&rhs);
+    if (!rhs_tuple || elems.size() != rhs_tuple->elems.size())
+        return false;
+
+    for (size_t i = 0; i < elems.size(); ++i)
+        if (!elems[i]->structureEquals(*rhs_tuple->elems[i]))
+            return false;
+
+    return true;
+}
+
 void SerializationInfoTuple::add(const IColumn & column)
 {
     SerializationInfo::add(column);
diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.h b/src/DataTypes/Serializations/SerializationInfoTuple.h
index b01c629d2ff..ef288948b9a 100644
--- a/src/DataTypes/Serializations/SerializationInfoTuple.h
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.h
@@ -11,6 +11,7 @@ public:
     SerializationInfoTuple(MutableSerializationInfos elems_, Names names_, const Settings & settings_);
 
     bool hasCustomSerialization() const override;
+    bool structureEquals(const SerializationInfo & rhs) const override;
 
     void add(const IColumn & column) override;
     void add(const SerializationInfo & other) override;
@@ -25,6 +26,7 @@ public:
     Poco::JSON::Object toJSON() const override;
     void fromJSON(const Poco::JSON::Object & object) override;
 
+    size_t getNumElements() const { return elems.size(); }
     const MutableSerializationInfoPtr & getElementInfo(size_t i) const { return elems[i]; }
     ISerialization::Kind getElementKind(size_t i) const { return elems[i]->getKind(); }
 
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index da11a87eb4d..aff17465466 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -719,7 +719,7 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to)
             { typeid(DataTypeUInt16),   typeid(DataTypeDate)     },
         };
 
-    /// Unwrap some nested and check for valid conevrsions
+    /// Unwrap some nested and check for valid conversions
     while (true)
     {
         /// types are equal, obviously pure metadata alter
@@ -749,10 +749,9 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to)
 
         const auto * nullable_from = typeid_cast<const DataTypeNullable *>(from);
         const auto * nullable_to = typeid_cast<const DataTypeNullable *>(to);
-        if (nullable_to)
+        if (nullable_from && nullable_to)
         {
-            /// Here we allow a conversion X -> Nullable(X) to make a metadata-only conversion.
-            from = nullable_from ? nullable_from->getNestedType().get() : from;
+            from = nullable_from->getNestedType().get();
             to = nullable_to->getNestedType().get();
             continue;
         }
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 03829f1daf9..f6fc40884a1 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -86,7 +86,6 @@ MergedColumnOnlyOutputStream::fillChecksums(
     }
 
     new_part->setColumns(columns, serialization_infos);
-
     return checksums;
 }
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 4b9265488e7..8d95a76b5da 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -203,20 +203,50 @@ getColumnsForNewDataPart(
     }
 
     SerializationInfoByName new_serialization_infos;
-    for (const auto & [name, info] : serialization_infos)
+    for (const auto & [name, old_info] : serialization_infos)
     {
         if (removed_columns.contains(name))
             continue;
 
         auto it = renamed_columns_from_to.find(name);
-        if (it != renamed_columns_from_to.end())
-            new_serialization_infos.emplace(it->second, info);
-        else
-            new_serialization_infos.emplace(name, info);
+        auto new_name = it == renamed_columns_from_to.end() ? name : it->second;
+
+        if (!updated_header.has(new_name))
+        {
+            new_serialization_infos.emplace(new_name, old_info);
+            continue;
+        }
+
+        auto old_type = part_columns.getPhysical(name);
+        auto new_type = updated_header.getByName(new_name).type;
+
+        if (!new_type->supportsSparseSerialization())
+            continue;
+
+        SerializationInfo::Settings settings
+        {
+            .ratio_of_defaults_for_sparse = source_part->storage.getSettings()->ratio_of_defaults_for_sparse_serialization,
+            .choose_kind = false
+        };
+
+        auto new_info = new_type->createSerializationInfo(settings);
+        if (!old_info->structureEquals(*new_info))
+        {
+            new_serialization_infos.emplace(new_name, std::move(new_info));
+            continue;
+        }
+
+        if (!old_info->hasCustomSerialization())
+        {
+            new_serialization_infos.emplace(new_name, old_info);
+            continue;
+        }
+
+        new_info = new_type->cloneSerializationInfo(*old_info, settings);
+        new_serialization_infos.emplace(new_name, std::move(new_info));
     }
 
-    /// In compact parts we read all columns, because they all stored in a
-    /// single file
+    /// In compact parts we read all columns, because they all stored in a single file
     if (!isWidePart(source_part) || !isFullPartStorage(source_part->getDataPartStorage()))
         return {updated_header.getNamesAndTypesList(), new_serialization_infos};
 
@@ -1221,8 +1251,8 @@ private:
             skip_part_indices,
             ctx->compression_codec,
             ctx->txn,
-            false,
-            false,
+            /*reset_columns=*/ true,
+            /*blocks_are_granules_size=*/ false,
             ctx->context->getWriteSettings());
 
         ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference
new file mode 100644
index 00000000000..3f5c8b6ed1f
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference
@@ -0,0 +1,13 @@
+1_1_1_0	String	Sparse
+477	['','foo']
+1_1_1_0_2	Nullable(String)	Default
+477	['','foo']
+1_1_1_0_2	Nullable(String)	Default
+2_3_3_0	Nullable(String)	Default
+954	['','foo']
+1_1_1_0_4	String	Default
+2_3_3_0_4	String	Default
+954	['','foo']
+1_1_1_1_4	String	Sparse
+2_3_3_1_4	String	Sparse
+954	['','foo']
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql
new file mode 100644
index 00000000000..3bf37e8e62b
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql
@@ -0,0 +1,49 @@
+SET mutations_sync = 2;
+
+DROP TABLE IF EXISTS t_sparse_mutations_1;
+
+CREATE TABLE t_sparse_mutations_1 (key UInt8, id UInt64, s String)
+ENGINE = MergeTree ORDER BY id PARTITION BY key
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
+
+INSERT INTO t_sparse_mutations_1 SELECT 1, number, if (number % 21 = 0, 'foo', '') FROM numbers (10000);
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active
+ORDER BY name;
+
+SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1;
+
+ALTER TABLE t_sparse_mutations_1 MODIFY COLUMN s Nullable(String);
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active
+ORDER BY name;
+
+SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1;
+
+INSERT INTO t_sparse_mutations_1 SELECT 2, number, if (number % 21 = 0, 'foo', '') FROM numbers (10000);
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active
+ORDER BY name;
+
+SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1;
+
+ALTER TABLE t_sparse_mutations_1 MODIFY COLUMN s String;
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active
+ORDER BY name;
+
+SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1;
+
+OPTIMIZE TABLE t_sparse_mutations_1 FINAL;
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active
+ORDER BY name;
+
+SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1;
+
+DROP TABLE t_sparse_mutations_1;
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference
new file mode 100644
index 00000000000..64eb0119982
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference
@@ -0,0 +1,6 @@
+String	Default
+10000	49995000
+String	Default
+770	3848845
+String	Sparse
+770	3848845
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql
new file mode 100644
index 00000000000..561bd164200
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql
@@ -0,0 +1,33 @@
+SET mutations_sync = 2;
+
+DROP TABLE IF EXISTS t_sparse_mutations_2;
+
+CREATE TABLE t_sparse_mutations_2 (key UInt8, id UInt64, s String)
+ENGINE = MergeTree ORDER BY id PARTITION BY key
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
+
+INSERT INTO t_sparse_mutations_2 SELECT 1, number, toString(number) FROM numbers (10000);
+
+SELECT type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_2' AND column = 's' AND active
+ORDER BY name;
+
+SELECT count(), sum(s::UInt64) FROM t_sparse_mutations_2 WHERE s != '';
+
+ALTER TABLE t_sparse_mutations_2 UPDATE s = '' WHERE id % 13 != 0;
+
+SELECT type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_2' AND column = 's' AND active
+ORDER BY name;
+
+SELECT count(), sum(s::UInt64) FROM t_sparse_mutations_2 WHERE s != '';
+
+OPTIMIZE TABLE t_sparse_mutations_2 FINAL;
+
+SELECT type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_2' AND column = 's' AND active
+ORDER BY name;
+
+SELECT count(), sum(s::UInt64) FROM t_sparse_mutations_2 WHERE s != '';
+
+DROP TABLE t_sparse_mutations_2;
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference
new file mode 100644
index 00000000000..1501fd27fd5
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference
@@ -0,0 +1,11 @@
+String	Default
+Tuple(UInt64, UInt64, String, String, String)	Default	['1','2','3','4','5']	['UInt64','UInt64','String','String','String']	['Default','Default','Default','Default','Default']
+10000	0	['1']	['0']	['']
+Tuple(UInt64, UInt64, String, String, String)	Default	['1','2','3','4','5']	['UInt64','UInt64','String','String','String']	['Default','Sparse','Default','Default','Sparse']
+10000	0	['1']	['0']	['']
+Tuple(UInt64, UInt64, UInt64, UInt64, String)	Default	['1','2','3','4','5']	['UInt64','UInt64','UInt64','UInt64','String']	['Default','Sparse','Default','Default','Sparse']
+10000	0	10000	0	['']
+Tuple(UInt64, UInt64, UInt64, UInt64, String)	Default	['1','2','3','4','5']	['UInt64','UInt64','UInt64','UInt64','String']	['Default','Sparse','Default','Sparse','Sparse']
+10000	0	10000	0	['']
+Tuple(Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String))	Default	['1','1.null','2','2.null','3','3.null','4','4.null','5','5.null']	['Nullable(UInt64)','UInt8','Nullable(UInt64)','UInt8','Nullable(UInt64)','UInt8','Nullable(UInt64)','UInt8','Nullable(String)','UInt8']	['Default','Default','Default','Default','Default','Default','Default','Default','Default','Default']
+10000	0	10000	0	['']
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql
new file mode 100644
index 00000000000..6e66336dcbc
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql
@@ -0,0 +1,85 @@
+SET mutations_sync = 2;
+
+DROP TABLE IF EXISTS t_sparse_mutations_3;
+
+CREATE TABLE t_sparse_mutations_3 (key UInt8, id UInt64, s String)
+ENGINE = MergeTree ORDER BY id PARTITION BY key
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
+
+INSERT INTO t_sparse_mutations_3 SELECT 1, number, toString(tuple(1, 0, '1', '0', '')) FROM numbers (10000);
+
+SELECT type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active
+ORDER BY name;
+
+ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(UInt64, UInt64, String, String, String);
+
+SELECT
+    type,
+    serialization_kind,
+    subcolumns.names,
+    subcolumns.types,
+    subcolumns.serializations
+FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active
+ORDER BY name;
+
+SELECT sum(s.1), sum(s.2), groupUniqArray(s.3), groupUniqArray(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3;
+
+OPTIMIZE TABLE t_sparse_mutations_3 FINAL;
+
+SELECT
+    type,
+    serialization_kind,
+    subcolumns.names,
+    subcolumns.types,
+    subcolumns.serializations
+FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active
+ORDER BY name;
+
+SELECT sum(s.1), sum(s.2), groupUniqArray(s.3), groupUniqArray(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3;
+
+ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(UInt64, UInt64, UInt64, UInt64, String);
+
+SELECT
+    type,
+    serialization_kind,
+    subcolumns.names,
+    subcolumns.types,
+    subcolumns.serializations
+FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active
+ORDER BY name;
+
+SELECT sum(s.1), sum(s.2), sum(s.3), sum(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3;
+
+OPTIMIZE TABLE t_sparse_mutations_3 FINAL;
+
+SELECT
+    type,
+    serialization_kind,
+    subcolumns.names,
+    subcolumns.types,
+    subcolumns.serializations
+FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active
+ORDER BY name;
+
+SELECT sum(s.1), sum(s.2), sum(s.3), sum(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3;
+
+ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String));
+
+SELECT
+    type,
+    serialization_kind,
+    subcolumns.names,
+    subcolumns.types,
+    subcolumns.serializations
+FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active
+ORDER BY name;
+
+SELECT sum(s.1), sum(s.2), sum(s.3), sum(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3;
+
+DROP TABLE t_sparse_mutations_3;

From a110e0f0225acdc2868423b08efc8b23fe1ad697 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 Feb 2023 15:58:56 +0100
Subject: [PATCH 061/333] Allow to ignore errors while pushing to MATERILIZED
 VIEW

This can be useful in the following scenarious:

- you want to duplicate the data to another table and you don't care
  about the errors
- you want to duplicate system.*_log to another server, you are adding
  materialized view that will push to Distributed table, but you don't
  want to miss original blocks in the local system.*_log
- you want to push some data to a 3d party service, using i.e. URL
  engine.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../sql-reference/statements/create/view.md   |  2 ++
 src/Core/Settings.h                           |  1 +
 .../Transforms/buildPushingToViewsChain.cpp   | 16 ++++++++--
 ...materialized_views_ignore_errors.reference | 17 +++++++++++
 ...02572_materialized_views_ignore_errors.sql | 30 +++++++++++++++++++
 5 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference
 create mode 100644 tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 527b31b36a4..35242ec7a93 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -66,6 +66,8 @@ A materialized view is implemented as follows: when inserting data to the table
 Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
 
 Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
+
+By default if the will be an error during pushing to the materialized view the data will not be inserted into the table to which the materialized view is attached. You can change this by setting `materialized_views_ignore_errors=true` setting for your `INSERT` query.
 :::
 
 If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3002bcc8b49..ac9a169e2a8 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -508,6 +508,7 @@ class IColumn;
     M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
     M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
     M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
+    M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
     M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
     M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
     M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index a58d70a8428..154986045d9 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -15,6 +15,7 @@
 #include <Storages/StorageMaterializedView.h>
 #include <Storages/StorageValues.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
+#include <Common/Exception.h>
 #include <Common/CurrentThread.h>
 #include <Common/MemoryTracker.h>
 #include <Common/ProfileEvents.h>
@@ -173,7 +174,7 @@ class FinalizingViewsTransform final : public IProcessor
     static InputPorts initPorts(std::vector<Block> headers);
 
 public:
-    FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data);
+    FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data, bool materialized_views_ignore_errors_);
 
     String getName() const override { return "FinalizingViewsTransform"; }
     Status prepare() override;
@@ -184,6 +185,7 @@ private:
     ViewsDataPtr views_data;
     std::vector<ExceptionStatus> statuses;
     std::exception_ptr any_exception;
+    bool materialized_views_ignore_errors;
 };
 
 
@@ -407,7 +409,7 @@ Chain buildPushingToViewsChain(
             headers.push_back(chain.getOutputHeader());
 
         auto copying_data = std::make_shared<CopyingDataToViewsTransform>(storage_header, views_data);
-        auto finalizing_views = std::make_shared<FinalizingViewsTransform>(std::move(headers), views_data);
+        auto finalizing_views = std::make_shared<FinalizingViewsTransform>(std::move(headers), views_data, settings.materialized_views_ignore_errors);
         auto out = copying_data->getOutputs().begin();
         auto in = finalizing_views->getInputs().begin();
 
@@ -684,10 +686,11 @@ void PushingToWindowViewSink::consume(Chunk chunk)
 }
 
 
-FinalizingViewsTransform::FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data)
+FinalizingViewsTransform::FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data, bool materialized_views_ignore_errors_)
     : IProcessor(initPorts(std::move(headers)), {Block()})
     , output(outputs.front())
     , views_data(std::move(data))
+    , materialized_views_ignore_errors(materialized_views_ignore_errors_)
 {
     statuses.resize(views_data->views.size());
 }
@@ -788,6 +791,13 @@ void FinalizingViewsTransform::work()
         auto & status = statuses[i];
         ++i;
 
+        if (status.exception && materialized_views_ignore_errors)
+        {
+            auto exception = addStorageToException(status.exception, view.table_id);
+            tryLogException(exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error");
+            continue;
+        }
+
         if (status.exception)
         {
             if (!any_exception)
diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference
new file mode 100644
index 00000000000..537865e10da
--- /dev/null
+++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference
@@ -0,0 +1,17 @@
+-- { echoOn }
+select * from data_02572 order by key;
+insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
+select * from data_02572 order by key;
+1
+insert into data_02572 settings materialized_views_ignore_errors=1 values (2);
+select * from data_02572 order by key;
+1
+2
+create table receiver_02572 as data_02572;
+insert into data_02572 values (3);
+select * from data_02572 order by key;
+1
+2
+3
+select * from receiver_02572 order by key;
+3
diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql
new file mode 100644
index 00000000000..41fa9e32d1c
--- /dev/null
+++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql
@@ -0,0 +1,30 @@
+set prefer_localhost_replica=1;
+
+drop table if exists data_02572;
+drop table if exists proxy_02572;
+drop table if exists push_to_proxy_mv_02572;
+drop table if exists receiver_02572;
+
+create table data_02572 (key Int) engine=Memory();
+
+create table proxy_02572 (key Int) engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');
+-- ensure that insert fails
+insert into proxy_02572 values (1); -- { serverError UNKNOWN_TABLE }
+
+-- proxy data with MV
+create materialized view push_to_proxy_mv_02572 to proxy_02572 as select * from data_02572;
+
+-- { echoOn }
+select * from data_02572 order by key;
+
+insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
+select * from data_02572 order by key;
+
+insert into data_02572 settings materialized_views_ignore_errors=1 values (2);
+select * from data_02572 order by key;
+
+create table receiver_02572 as data_02572;
+
+insert into data_02572 values (3);
+select * from data_02572 order by key;
+select * from receiver_02572 order by key;

From 6391e2daecad874e29ae444b24f537a64758de0e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 Feb 2023 16:27:30 +0100
Subject: [PATCH 062/333] Set materialized_views_ignore_errors unconditionally
 for system.*_log tables

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docs/en/sql-reference/statements/create/view.md |  2 ++
 src/Interpreters/SystemLog.cpp                  |  2 ++
 ...s_materialized_views_ignore_errors.reference |  0
 ...em_logs_materialized_views_ignore_errors.sql | 17 +++++++++++++++++
 4 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference
 create mode 100644 tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 35242ec7a93..5d54662326a 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -68,6 +68,8 @@ Materialized views in ClickHouse use **column names** instead of column order du
 Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
 
 By default if the will be an error during pushing to the materialized view the data will not be inserted into the table to which the materialized view is attached. You can change this by setting `materialized_views_ignore_errors=true` setting for your `INSERT` query.
+
+`materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
 :::
 
 If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 9d8547abcf2..78513920236 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -426,6 +426,8 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
         // we need query context to do inserts to target table with MV containing subqueries or joins
         auto insert_context = Context::createCopy(context);
         insert_context->makeQueryContext();
+        /// We always want to deliver the data to the original table regardless of the MVs
+        insert_context->setSetting("materialized_views_ignore_errors", true);
 
         InterpreterInsertQuery interpreter(query_ptr, insert_context);
         BlockIO io = interpreter.execute();
diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
new file mode 100644
index 00000000000..d26f86962c4
--- /dev/null
+++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
@@ -0,0 +1,17 @@
+-- Tags: no-parallel
+-- Tag no-parallel: due to attaching to system.query_log
+
+-- Attach MV to system.query_log and check that writing query_log will not fail
+
+drop table if exists log_proxy_02572;
+drop table if exists push_to_logs_proxy_mv_02572;
+
+create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');
+create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;
+
+set log_queries=1;
+system flush logs;
+system flush logs;
+
+drop table log_proxy_02572;
+drop table push_to_logs_proxy_mv_02572;

From 81435282ec2665ba189511a9427a7ccf6692e963 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 Feb 2023 17:04:58 +0100
Subject: [PATCH 063/333] Add an entry to system.query_views_log regardless of
 materialized_views_ignore_errors

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../Transforms/buildPushingToViewsChain.cpp     | 15 ++++++---------
 ...2_materialized_views_ignore_errors.reference | 17 ++++++++++++++---
 .../02572_materialized_views_ignore_errors.sql  | 16 +++++++++++++---
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 154986045d9..13c4806590e 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -737,7 +737,7 @@ IProcessor::Status FinalizingViewsTransform::prepare()
                 else
                     statuses[i].exception = data.exception;
 
-                if (i == 0 && statuses[0].is_first)
+                if (i == 0 && statuses[0].is_first && !materialized_views_ignore_errors)
                 {
                     output.pushData(std::move(data));
                     return Status::PortFull;
@@ -754,7 +754,7 @@ IProcessor::Status FinalizingViewsTransform::prepare()
         if (!statuses.empty())
             return Status::Ready;
 
-        if (any_exception)
+        if (any_exception && !materialized_views_ignore_errors)
             output.pushException(any_exception);
 
         output.finish();
@@ -791,19 +791,16 @@ void FinalizingViewsTransform::work()
         auto & status = statuses[i];
         ++i;
 
-        if (status.exception && materialized_views_ignore_errors)
-        {
-            auto exception = addStorageToException(status.exception, view.table_id);
-            tryLogException(exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error");
-            continue;
-        }
-
         if (status.exception)
         {
             if (!any_exception)
                 any_exception = status.exception;
 
             view.setException(addStorageToException(status.exception, view.table_id));
+
+            /// Exception will be ignored, it is saved here for the system.query_views_log
+            if (materialized_views_ignore_errors)
+                tryLogException(view.exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error");
         }
         else
         {
diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference
index 537865e10da..fc2e6b78122 100644
--- a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference
+++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference
@@ -1,10 +1,21 @@
 -- { echoOn }
 select * from data_02572 order by key;
-insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
-select * from data_02572 order by key;
-1
 insert into data_02572 settings materialized_views_ignore_errors=1 values (2);
 select * from data_02572 order by key;
+2
+-- check system.query_views_log
+system flush logs;
+-- lower(status) to pass through clickhouse-test "exception" check
+select lower(status::String), errorCodeToName(exception_code)
+from system.query_views_log where
+    view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and
+    view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572')
+    order by event_date, event_time
+;
+exceptionwhileprocessing	UNKNOWN_TABLE
+-- materialized_views_ignore_errors=0
+insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
+select * from data_02572 order by key;
 1
 2
 create table receiver_02572 as data_02572;
diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql
index 41fa9e32d1c..2d1f824b9b1 100644
--- a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql
+++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql
@@ -17,11 +17,21 @@ create materialized view push_to_proxy_mv_02572 to proxy_02572 as select * from
 -- { echoOn }
 select * from data_02572 order by key;
 
-insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
-select * from data_02572 order by key;
-
 insert into data_02572 settings materialized_views_ignore_errors=1 values (2);
 select * from data_02572 order by key;
+-- check system.query_views_log
+system flush logs;
+-- lower(status) to pass through clickhouse-test "exception" check
+select lower(status::String), errorCodeToName(exception_code)
+from system.query_views_log where
+    view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and
+    view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572')
+    order by event_date, event_time
+;
+
+-- materialized_views_ignore_errors=0
+insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
+select * from data_02572 order by key;
 
 create table receiver_02572 as data_02572;
 

From 83b2f6434107be0852b07b466faeb42f77e8496f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 Feb 2023 17:23:26 +0100
Subject: [PATCH 064/333] Improve
 02572_system_logs_materialized_views_ignore_errors test

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 ...tem_logs_materialized_views_ignore_errors.reference |  2 ++
 ...72_system_logs_materialized_views_ignore_errors.sql | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference
index e69de29bb2d..d5446e756a3 100644
--- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference
+++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference
@@ -0,0 +1,2 @@
+10	querystart	OK
+10	queryfinish	OK
diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
index d26f86962c4..b7362ac1c33 100644
--- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
+++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
@@ -3,15 +3,23 @@
 
 -- Attach MV to system.query_log and check that writing query_log will not fail
 
+set log_queries=1;
+
 drop table if exists log_proxy_02572;
 drop table if exists push_to_logs_proxy_mv_02572;
 
 create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');
 create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;
 
-set log_queries=1;
+select 1 format Null;
 system flush logs;
 system flush logs;
 
 drop table log_proxy_02572;
 drop table push_to_logs_proxy_mv_02572;
+
+system flush logs;
+-- lower() to pass through clickhouse-test "exception" check
+select count(), lower(type::String), errorCodeToName(exception_code)
+    from system.query_log
+    where current_database = currentDatabase() group by 2, 3;

From 4f8da66bb51c0e5f59674b2e2f86dfe49840fe08 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 21 Feb 2023 18:25:10 +0000
Subject: [PATCH 065/333] fix unit test with sparse columns

---
 src/Columns/ColumnSparse.cpp              | 1 +
 src/Columns/tests/gtest_column_sparse.cpp | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 0e408fca467..fbab61c879d 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -456,6 +456,7 @@ void ColumnSparse::compareColumn(const IColumn & rhs, size_t rhs_row_num,
             nullptr, nested_result, direction, nan_direction_hint);
 
         const auto & offsets_data = getOffsetsData();
+        compare_results.resize(size());
         std::fill(compare_results.begin(), compare_results.end(), nested_result[0]);
         for (size_t i = 0; i < offsets_data.size(); ++i)
             compare_results[offsets_data[i]] = nested_result[i + 1];
diff --git a/src/Columns/tests/gtest_column_sparse.cpp b/src/Columns/tests/gtest_column_sparse.cpp
index b5b226c6862..6062ea51941 100644
--- a/src/Columns/tests/gtest_column_sparse.cpp
+++ b/src/Columns/tests/gtest_column_sparse.cpp
@@ -327,4 +327,3 @@ TEST(ColumnSparse, GetPermutation)
 }
 
 #undef DUMP_COLUMN
-#undef DUMP_NON_DEFAULTS

From 3a58951f6446b3aeca13cdc671322f459c4f77a7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 Feb 2023 19:10:43 +0100
Subject: [PATCH 066/333] Improve documentation for
 materialized_views_ignore_errors

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docs/en/sql-reference/statements/create/view.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 5d54662326a..6e412c13291 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -67,9 +67,11 @@ Materialized views in ClickHouse use **column names** instead of column order du
 
 Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
 
-By default if the will be an error during pushing to the materialized view the data will not be inserted into the table to which the materialized view is attached. You can change this by setting `materialized_views_ignore_errors=true` setting for your `INSERT` query.
+Materialized views in ClickHouse does not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
 
-`materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
+By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
+
+Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
 :::
 
 If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.

From f5c232707d7e948776a67e761bcf65e19d08e05f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 21 Feb 2023 20:04:40 +0100
Subject: [PATCH 067/333] cancel insert queries correctly

---
 .../PushingAsyncPipelineExecutor.cpp          |  4 +-
 .../Executors/PushingPipelineExecutor.cpp     |  4 +-
 src/Server/GRPCServer.cpp                     |  5 +-
 src/Server/TCPHandler.cpp                     | 15 ++++-
 src/Storages/StorageReplicatedMergeTree.cpp   |  4 +-
 ...ert_cancellation_native_protocol.reference |  2 +
 ...434_insert_cancellation_native_protocol.sh | 65 +++++++++++++++++++
 7 files changed, 93 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference
 create mode 100755 tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh

diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
index 54c1e7bf30f..20f47c6b54c 100644
--- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
@@ -139,9 +139,11 @@ PushingAsyncPipelineExecutor::PushingAsyncPipelineExecutor(QueryPipeline & pipel
 
 PushingAsyncPipelineExecutor::~PushingAsyncPipelineExecutor()
 {
+    /// It must be finalized explicitly. Otherwise we cancel it assuming it's due to an exception.
+    chassert(finished || std::uncaught_exceptions());
     try
     {
-        finish();
+        cancel();
     }
     catch (...)
     {
diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp
index d9a14704cd0..ca60932ed6f 100644
--- a/src/Processors/Executors/PushingPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingPipelineExecutor.cpp
@@ -63,9 +63,11 @@ PushingPipelineExecutor::PushingPipelineExecutor(QueryPipeline & pipeline_) : pi
 
 PushingPipelineExecutor::~PushingPipelineExecutor()
 {
+    /// It must be finalized explicitly. Otherwise we cancel it assuming it's due to an exception.
+    chassert(finished || std::uncaught_exceptions());
     try
     {
-        finish();
+        cancel();
     }
     catch (...)
     {
diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 595f5a8c2b7..9a4e43b2fde 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -984,7 +984,10 @@ namespace
                 executor.push(block);
         }
 
-        executor.finish();
+        if (isQueryCancelled())
+            executor.cancel();
+        else
+            executor.finish();
     }
 
     void Call::initializePipeline(const Block & header)
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index f76c342fa9a..f1340764e00 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -109,6 +109,7 @@ namespace ErrorCodes
     extern const int UNEXPECTED_PACKET_FROM_CLIENT;
     extern const int UNKNOWN_PROTOCOL;
     extern const int AUTHENTICATION_FAILED;
+    extern const int QUERY_WAS_CANCELLED;
 }
 
 TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_)
@@ -658,6 +659,7 @@ bool TCPHandler::readDataNext()
             {
                 LOG_INFO(log, "Client has dropped the connection, cancel the query.");
                 state.is_connection_closed = true;
+                state.is_cancelled = true;
                 break;
             }
 
@@ -701,6 +703,9 @@ void TCPHandler::readData()
 
     while (readDataNext())
         ;
+
+    if (state.is_cancelled)
+        throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
 }
 
 
@@ -711,6 +716,9 @@ void TCPHandler::skipData()
 
     while (readDataNext())
         ;
+
+    if (state.is_cancelled)
+        throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
 }
 
 
@@ -747,7 +755,10 @@ void TCPHandler::processInsertQuery()
         while (readDataNext())
             executor.push(std::move(state.block_for_insert));
 
-        executor.finish();
+        if (state.is_cancelled)
+            executor.cancel();
+        else
+            executor.finish();
     };
 
     if (num_threads > 1)
@@ -1314,6 +1325,8 @@ bool TCPHandler::receivePacket()
                 std::this_thread::sleep_for(ms);
             }
 
+            state.is_cancelled = true;
+
             return false;
         }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index db99007cb4f..b8e889d6182 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -794,7 +794,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
         auto code = zookeeper->tryMulti(ops, responses);
         if (code == Coordination::Error::ZNODEEXISTS)
         {
-            LOG_WARNING(log, "It looks like the table {} was created by another server at the same moment, will retry", zookeeper_path);
+            LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zookeeper_path);
             continue;
         }
         else if (code != Coordination::Error::ZOK)
@@ -874,7 +874,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
             case Coordination::Error::ZNODEEXISTS:
                 throw Exception(ErrorCodes::REPLICA_ALREADY_EXISTS, "Replica {} already exists", replica_path);
             case Coordination::Error::ZBADVERSION:
-                LOG_ERROR(log, "Retrying createReplica(), because some other replicas were created at the same time");
+                LOG_INFO(log, "Retrying createReplica(), because some other replicas were created at the same time");
                 break;
             case Coordination::Error::ZNONODE:
                 throw Exception(ErrorCodes::ALL_REPLICAS_LOST, "Table {} was suddenly removed", zookeeper_path);
diff --git a/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference b/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference
new file mode 100644
index 00000000000..e3038e03530
--- /dev/null
+++ b/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference
@@ -0,0 +1,2 @@
+5000000
+5000000
diff --git a/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh b/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh
new file mode 100755
index 00000000000..4eb02c38c1a
--- /dev/null
+++ b/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# shellcheck disable=SC2009
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv"
+export TEST_MARK="02434_insert_${CLICKHOUSE_DATABASE}_"
+
+$CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE
+$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;'
+$CLICKHOUSE_CLIENT --max_block_size=100000 --min_chunk_bytes_for_parallel_parsing=10000 -q 'insert into dedup_test format TSV' < $DATA_FILE
+$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
+
+function thread_insert
+{
+    # supress "Killed" messages from bash
+    function wrap
+    {
+        $CLICKHOUSE_CLIENT --max_block_size=100000 --min_chunk_bytes_for_parallel_parsing=10000 --query_id="$ID" -q 'insert into dedup_test format TSV' < $DATA_FILE
+    }
+    export -f wrap
+    while true; do
+        export ID="$TEST_MARK$RANDOM"
+        bash -c wrap 2>&1| grep -Fav "Killed"
+    done
+}
+
+function thread_select
+{
+    while true; do
+        $CLICKHOUSE_CLIENT -q "with (select count() from dedup_test) as c select throwIf(c != 5000000, 'Expected 5000000 rows, got ' || toString(c)) format Null"
+        sleep 0.$RANDOM;
+    done
+}
+
+function thread_cancel
+{
+    while true; do
+        SIGNAL="INT"
+        if (( RANDOM % 2 )); then
+            SIGNAL="KILL"
+        fi
+        PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
+        if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID" || echo "$PID"; fi
+        sleep 0.$RANDOM;
+        sleep 0.$RANDOM;
+        sleep 0.$RANDOM;
+    done
+}
+
+export -f thread_insert;
+export -f thread_select;
+export -f thread_cancel;
+
+TIMEOUT=30
+
+timeout $TIMEOUT bash -c thread_insert &
+timeout $TIMEOUT bash -c thread_select &
+timeout $TIMEOUT bash -c thread_cancel &
+
+wait
+
+$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'

From 592af6d652b69c8e5b18e2a8578ac76cbfcffe71 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Feb 2023 02:34:03 +0100
Subject: [PATCH 068/333] fix incomplete interst through http

---
 src/Common/StatusFile.cpp                     |  2 +-
 src/Core/MySQL/IMySQLReadPacket.cpp           |  4 +-
 src/IO/LimitReadBuffer.cpp                    | 24 +++++--
 src/IO/LimitReadBuffer.h                      |  8 ++-
 src/Interpreters/executeQuery.cpp             |  2 +-
 src/Server/HTTP/HTTPServerRequest.cpp         | 15 +++-
 src/Server/MySQLHandler.cpp                   |  2 +-
 src/Server/TCPHandler.cpp                     |  2 +-
 src/Storages/StorageLog.cpp                   |  2 +-
 .../02435_insert_cancellation_http.reference  |  2 +
 .../02435_insert_cancellation_http.sh         | 69 +++++++++++++++++++
 11 files changed, 114 insertions(+), 18 deletions(-)
 create mode 100644 tests/queries/0_stateless/02435_insert_cancellation_http.reference
 create mode 100755 tests/queries/0_stateless/02435_insert_cancellation_http.sh

diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp
index d90d50ff96d..e71056fcaa3 100644
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@@ -51,7 +51,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
         std::string contents;
         {
             ReadBufferFromFile in(path, 1024);
-            LimitReadBuffer limit_in(in, 1024, false);
+            LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ false);
             readStringUntilEOF(contents, limit_in);
         }
 
diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp
index 74f309d0294..c99ae11688b 100644
--- a/src/Core/MySQL/IMySQLReadPacket.cpp
+++ b/src/Core/MySQL/IMySQLReadPacket.cpp
@@ -33,13 +33,13 @@ void IMySQLReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
 
 void LimitedReadPacket::readPayload(ReadBuffer &in, uint8_t &sequence_id)
 {
-    LimitReadBuffer limited(in, 10000, true, "too long MySQL packet.");
+    LimitReadBuffer limited(in, 10000, /* trow_exception */ true, /* exact_limit */ false, "too long MySQL packet.");
     IMySQLReadPacket::readPayload(limited, sequence_id);
 }
 
 void LimitedReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
 {
-    LimitReadBuffer limited(in, 10000, true, "too long MySQL packet.");
+    LimitReadBuffer limited(in, 10000, /* trow_exception */ true, /* exact_limit */ false, "too long MySQL packet.");
     IMySQLReadPacket::readPayloadWithUnpacked(limited);
 }
 
diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp
index 6b3c383c753..96e2c359b58 100644
--- a/src/IO/LimitReadBuffer.cpp
+++ b/src/IO/LimitReadBuffer.cpp
@@ -9,6 +9,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LIMIT_EXCEEDED;
+    extern const int CANNOT_READ_ALL_DATA;
 }
 
 
@@ -21,14 +22,22 @@ bool LimitReadBuffer::nextImpl()
 
     if (bytes >= limit)
     {
+        if (exact_limit && bytes == limit)
+            return false;
+
+        if (exact_limit && bytes != limit)
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected data, got {} bytes, expected {}", bytes, limit);
+
         if (throw_exception)
             throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Limit for LimitReadBuffer exceeded: {}", exception_message);
-        else
-            return false;
+
+        return false;
     }
 
     if (!in->next())
     {
+        if (exact_limit && bytes != limit)
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, got {} of {} bytes", bytes, limit);
         /// Clearing the buffer with existing data.
         set(in->position(), 0);
         return false;
@@ -43,12 +52,13 @@ bool LimitReadBuffer::nextImpl()
 }
 
 
-LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_)
+LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_)
     : ReadBuffer(in_ ? in_->position() : nullptr, 0)
     , in(in_)
     , owns_in(owns)
     , limit(limit_)
     , throw_exception(throw_exception_)
+    , exact_limit(exact_limit_)
     , exception_message(std::move(exception_message_))
 {
     assert(in);
@@ -61,14 +71,14 @@ LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, boo
 }
 
 
-LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
-    : LimitReadBuffer(&in_, false, limit_, throw_exception_, exception_message_)
+LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_)
+    : LimitReadBuffer(&in_, false, limit_, throw_exception_, exact_limit_, exception_message_)
 {
 }
 
 
-LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::string exception_message_)
-    : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exception_message_)
+LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_)
+    : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exact_limit_, exception_message_)
 {
 }
 
diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h
index 92e5fbb0aa6..0f99bf2a08d 100644
--- a/src/IO/LimitReadBuffer.h
+++ b/src/IO/LimitReadBuffer.h
@@ -13,8 +13,9 @@ namespace DB
 class LimitReadBuffer : public ReadBuffer
 {
 public:
-    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
-    LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {});
+    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_ = {});
+    LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, bool exact_limit_,
+                    std::string exception_message_ = {});
     ~LimitReadBuffer() override;
 
 private:
@@ -23,9 +24,10 @@ private:
 
     UInt64 limit;
     bool throw_exception;
+    bool exact_limit;
     std::string exception_message;
 
-    LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_);
+    LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_);
 
     bool nextImpl() override;
 };
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 3bb46462353..aeaacf35051 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1237,7 +1237,7 @@ void executeQuery(
 
         /// If not - copy enough data into 'parse_buf'.
         WriteBufferFromVector<PODArray<char>> out(parse_buf);
-        LimitReadBuffer limit(istr, max_query_size + 1, false);
+        LimitReadBuffer limit(istr, max_query_size + 1, /* trow_exception */ false, /* exact_limit */ false);
         copyData(limit, out);
         out.finalize();
 
diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp
index a82eb95aee1..d2e19d594dc 100644
--- a/src/Server/HTTP/HTTPServerRequest.cpp
+++ b/src/Server/HTTP/HTTPServerRequest.cpp
@@ -12,6 +12,8 @@
 #include <Poco/Net/HTTPStream.h>
 #include <Poco/Net/NetException.h>
 
+#include <Common/logger_useful.h>
+
 #if USE_SSL
 #include <Poco/Net/SecureStreamSocketImpl.h>
 #include <Poco/Net/SSLException.h>
@@ -44,12 +46,23 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse
 
     readRequest(*in);  /// Try parse according to RFC7230
 
+    /// If a client crashes, most systems will gracefully terminate the connection with FIN just like it's done on close().
+    /// So we will get 0 from recv(...) and will not be able to understand that something went wrong (well, we probably
+    /// will get RST later on attempt to write to the socket that closed on the other side, but it will happen when the query is finished).
+    /// If we are extremely unlucky and data format is TSV, for example, then we may stop parsing exactly between rows
+    /// and decide that it's EOF (but it is not). It may break deduplication, because clients cannot control it
+    /// and retry with exactly the same (incomplete) set of rows.
+    /// That's why we have to check body size if it's provided.
     if (getChunkedTransferEncoding())
         stream = std::make_unique<HTTPChunkedReadBuffer>(std::move(in), context->getMaxChunkSize());
     else if (hasContentLength())
-        stream = std::make_unique<LimitReadBuffer>(std::move(in), getContentLength(), false);
+        stream = std::make_unique<LimitReadBuffer>(std::move(in), getContentLength(), /* trow_exception */ true, /* exact_limit */ true);
     else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE)
+    {
         stream = std::move(in);
+        LOG_WARNING(&Poco::Logger::get("HTTPServerRequest"), "Got an HTTP request with no content length, "
+                    "it may be impossible to distinguish graceful EOF from abnormal connection loss");
+    }
     else
         /// We have to distinguish empty buffer and nullptr.
         stream = std::make_unique<EmptyReadBuffer>();
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 3715d658730..279896da9fa 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -155,7 +155,7 @@ void MySQLHandler::run()
             payload.readStrict(command);
 
             // For commands which are executed without MemoryTracker.
-            LimitReadBuffer limited_payload(payload, 10000, true, "too long MySQL packet.");
+            LimitReadBuffer limited_payload(payload, 10000, /* trow_exception */ true, /* exact_limit */ false, "too long MySQL packet.");
 
             LOG_DEBUG(log, "Received command: {}. Connection id: {}.",
                 static_cast<int>(static_cast<unsigned char>(command)), connection_id);
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index f1340764e00..3e89d1810c7 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1057,7 +1057,7 @@ bool TCPHandler::receiveProxyHeader()
     /// Only PROXYv1 is supported.
     /// Validation of protocol is not fully performed.
 
-    LimitReadBuffer limit_in(*in, 107, true); /// Maximum length from the specs.
+    LimitReadBuffer limit_in(*in, 107, /* trow_exception */ true, /* exact_limit */ false); /// Maximum length from the specs.
 
     assertString("PROXY ", limit_in);
 
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index f2eca42ed0b..388faf1aeb5 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -118,7 +118,7 @@ private:
 
             if (limited_by_file_size)
             {
-                limited.emplace(*plain, file_size - offset, false);
+                limited.emplace(*plain, file_size - offset, /* trow_exception */ false, /* exact_limit */ false);
                 compressed.emplace(*limited);
             }
             else
diff --git a/tests/queries/0_stateless/02435_insert_cancellation_http.reference b/tests/queries/0_stateless/02435_insert_cancellation_http.reference
new file mode 100644
index 00000000000..e3038e03530
--- /dev/null
+++ b/tests/queries/0_stateless/02435_insert_cancellation_http.reference
@@ -0,0 +1,2 @@
+5000000
+5000000
diff --git a/tests/queries/0_stateless/02435_insert_cancellation_http.sh b/tests/queries/0_stateless/02435_insert_cancellation_http.sh
new file mode 100755
index 00000000000..653ec59f491
--- /dev/null
+++ b/tests/queries/0_stateless/02435_insert_cancellation_http.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# shellcheck disable=SC2009
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv"
+export TEST_MARK="02435_insert_${CLICKHOUSE_DATABASE}_"
+
+$CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE
+$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;'
+$CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&max_insert_block_size=100000&input_format_parallel_parsing=0&query=insert+into+dedup_test+format+TSV"  < $DATA_FILE
+$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
+
+function thread_insert
+{
+    # supress "Killed" messages from bash
+    function wrap
+    {
+        if (( RANDOM % 2 )); then
+            $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&query_id=$ID&max_insert_block_size=100000&input_format_parallel_parsing=0&query=insert+into+dedup_test+format+TSV"  < $DATA_FILE
+        else
+            $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&query_id=$ID&max_insert_block_size=100000&input_format_parallel_parsing=0&query=insert+into+dedup_test+format+TSV"  < $DATA_FILE
+        fi
+    }
+    export -f wrap
+    while true; do
+        export ID="$TEST_MARK$RANDOM"
+        bash -c wrap 2>&1| grep -Fav "Killed"
+    done
+}
+
+function thread_select
+{
+    while true; do
+        $CLICKHOUSE_CLIENT -q "with (select count() from dedup_test) as c select throwIf(c != 5000000, 'Expected 5000000 rows, got ' || toString(c)) format Null"
+        sleep 0.$RANDOM;
+    done
+}
+
+function thread_cancel
+{
+    while true; do
+        SIGNAL="INT"
+        if (( RANDOM % 2 )); then
+            SIGNAL="KILL"
+        fi
+        PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
+        if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID" || echo "$PID"; fi
+        sleep 0.$RANDOM;
+        sleep 0.$RANDOM;
+        sleep 0.$RANDOM;
+    done
+}
+
+export -f thread_insert;
+export -f thread_select;
+export -f thread_cancel;
+
+TIMEOUT=30
+
+timeout $TIMEOUT bash -c thread_insert &
+timeout $TIMEOUT bash -c thread_select &
+timeout $TIMEOUT bash -c thread_cancel &
+
+wait
+
+$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'

From a3d5a76b37252e93f929f4737753f5fb9a156cc0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Feb 2023 08:07:16 +0100
Subject: [PATCH 069/333] Do not run
 02572_system_logs_materialized_views_ignore_errors for Replicated

CI: https://s3.amazonaws.com/clickhouse-test-reports/46658/3a58951f6446b3aeca13cdc671322f459c4f77a7/stateless_tests__release__databasereplicated__[1/4].html
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../02572_system_logs_materialized_views_ignore_errors.sql     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
index b7362ac1c33..9568bc7af1a 100644
--- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
+++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql
@@ -1,5 +1,6 @@
--- Tags: no-parallel
+-- Tags: no-parallel, no-replicated-database
 -- Tag no-parallel: due to attaching to system.query_log
+-- Tag no-replicated-database: Replicated database will has extra queries
 
 -- Attach MV to system.query_log and check that writing query_log will not fail
 

From d0b50adffa98b6db67586098430d8f08f970e174 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 22 Feb 2023 11:00:14 +0100
Subject: [PATCH 070/333] Remove materialized_views_ignore_errors from
 FinalizingViewsTransform

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../Transforms/buildPushingToViewsChain.cpp           | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 13c4806590e..dce2c61457c 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -174,7 +174,7 @@ class FinalizingViewsTransform final : public IProcessor
     static InputPorts initPorts(std::vector<Block> headers);
 
 public:
-    FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data, bool materialized_views_ignore_errors_);
+    FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data);
 
     String getName() const override { return "FinalizingViewsTransform"; }
     Status prepare() override;
@@ -185,7 +185,6 @@ private:
     ViewsDataPtr views_data;
     std::vector<ExceptionStatus> statuses;
     std::exception_ptr any_exception;
-    bool materialized_views_ignore_errors;
 };
 
 
@@ -409,7 +408,7 @@ Chain buildPushingToViewsChain(
             headers.push_back(chain.getOutputHeader());
 
         auto copying_data = std::make_shared<CopyingDataToViewsTransform>(storage_header, views_data);
-        auto finalizing_views = std::make_shared<FinalizingViewsTransform>(std::move(headers), views_data, settings.materialized_views_ignore_errors);
+        auto finalizing_views = std::make_shared<FinalizingViewsTransform>(std::move(headers), views_data);
         auto out = copying_data->getOutputs().begin();
         auto in = finalizing_views->getInputs().begin();
 
@@ -686,11 +685,10 @@ void PushingToWindowViewSink::consume(Chunk chunk)
 }
 
 
-FinalizingViewsTransform::FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data, bool materialized_views_ignore_errors_)
+FinalizingViewsTransform::FinalizingViewsTransform(std::vector<Block> headers, ViewsDataPtr data)
     : IProcessor(initPorts(std::move(headers)), {Block()})
     , output(outputs.front())
     , views_data(std::move(data))
-    , materialized_views_ignore_errors(materialized_views_ignore_errors_)
 {
     statuses.resize(views_data->views.size());
 }
@@ -712,6 +710,7 @@ IProcessor::Status FinalizingViewsTransform::prepare()
     if (!output.canPush())
         return Status::PortFull;
 
+    bool materialized_views_ignore_errors = views_data->context->getSettingsRef().materialized_views_ignore_errors;
     size_t num_finished = 0;
     size_t pos = 0;
     for (auto & input : inputs)
@@ -785,6 +784,8 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St
 
 void FinalizingViewsTransform::work()
 {
+    bool materialized_views_ignore_errors = views_data->context->getSettingsRef().materialized_views_ignore_errors;
+
     size_t i = 0;
     for (auto & view : views_data->views)
     {

From e660c0838ca7ddf597f0a7d54b52e305b482a474 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 22 Feb 2023 17:54:35 +0100
Subject: [PATCH 071/333] fix multipart requests

---
 src/Common/StatusFile.cpp                     |  2 +-
 src/Core/ExternalTable.cpp                    |  6 ++--
 src/Core/MySQL/IMySQLReadPacket.cpp           |  4 +--
 src/IO/LimitReadBuffer.cpp                    | 19 +++++++-----
 src/IO/LimitReadBuffer.h                      |  9 +++---
 src/IO/examples/limit_read_buffer.cpp         |  4 +--
 src/IO/examples/limit_read_buffer2.cpp        |  6 ++--
 src/Interpreters/executeQuery.cpp             |  2 +-
 src/Server/HTTP/HTMLForm.cpp                  | 16 ++++++++--
 src/Server/HTTP/HTMLForm.h                    |  3 ++
 src/Server/HTTP/HTTPServerRequest.cpp         | 11 +++++--
 src/Server/MySQLHandler.cpp                   |  2 +-
 src/Server/TCPHandler.cpp                     |  2 +-
 src/Storages/StorageLog.cpp                   |  2 +-
 .../02435_insert_cancellation_http.sh         | 30 ++++++++++++-------
 15 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp
index e71056fcaa3..a9ffce7ddf8 100644
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@@ -51,7 +51,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
         std::string contents;
         {
             ReadBufferFromFile in(path, 1024);
-            LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ false);
+            LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ {});
             readStringUntilEOF(contents, limit_in);
         }
 
diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 217f8808185..36dcc677589 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -34,7 +34,7 @@ ExternalTableDataPtr BaseExternalTable::getData(ContextPtr context)
 {
     initReadBuffer();
     initSampleBlock();
-    auto input = context->getInputFormat(format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE);
+    auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").get<UInt64>());
 
     auto data = std::make_unique<ExternalTableData>();
     data->pipe = std::make_unique<QueryPipelineBuilder>();
@@ -135,7 +135,9 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
     if (settings.http_max_multipart_form_data_size)
         read_buffer = std::make_unique<LimitReadBuffer>(
             stream, settings.http_max_multipart_form_data_size,
-            true, "the maximum size of multipart/form-data. This limit can be tuned by 'http_max_multipart_form_data_size' setting");
+            /* trow_exception */ true, /* exact_limit */ std::optional<size_t>(),
+            "the maximum size of multipart/form-data. "
+            "This limit can be tuned by 'http_max_multipart_form_data_size' setting");
     else
         read_buffer = wrapReadBufferReference(stream);
 
diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp
index c99ae11688b..39b2e5bbfb5 100644
--- a/src/Core/MySQL/IMySQLReadPacket.cpp
+++ b/src/Core/MySQL/IMySQLReadPacket.cpp
@@ -33,13 +33,13 @@ void IMySQLReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
 
 void LimitedReadPacket::readPayload(ReadBuffer &in, uint8_t &sequence_id)
 {
-    LimitReadBuffer limited(in, 10000, /* trow_exception */ true, /* exact_limit */ false, "too long MySQL packet.");
+    LimitReadBuffer limited(in, 10000, /* trow_exception */ true, /* exact_limit */ {}, "too long MySQL packet.");
     IMySQLReadPacket::readPayload(limited, sequence_id);
 }
 
 void LimitedReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
 {
-    LimitReadBuffer limited(in, 10000, /* trow_exception */ true, /* exact_limit */ false, "too long MySQL packet.");
+    LimitReadBuffer limited(in, 10000, /* trow_exception */ true, /* exact_limit */ {}, "too long MySQL packet.");
     IMySQLReadPacket::readPayloadWithUnpacked(limited);
 }
 
diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp
index 96e2c359b58..e14112f8d19 100644
--- a/src/IO/LimitReadBuffer.cpp
+++ b/src/IO/LimitReadBuffer.cpp
@@ -22,11 +22,11 @@ bool LimitReadBuffer::nextImpl()
 
     if (bytes >= limit)
     {
-        if (exact_limit && bytes == limit)
+        if (exact_limit && bytes == *exact_limit)
             return false;
 
-        if (exact_limit && bytes != limit)
-            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected data, got {} bytes, expected {}", bytes, limit);
+        if (exact_limit && bytes != *exact_limit)
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected data, got {} bytes, expected {}", bytes, *exact_limit);
 
         if (throw_exception)
             throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Limit for LimitReadBuffer exceeded: {}", exception_message);
@@ -36,8 +36,8 @@ bool LimitReadBuffer::nextImpl()
 
     if (!in->next())
     {
-        if (exact_limit && bytes != limit)
-            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, got {} of {} bytes", bytes, limit);
+        if (exact_limit && bytes != *exact_limit)
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, got {} of {} bytes", bytes, *exact_limit);
         /// Clearing the buffer with existing data.
         set(in->position(), 0);
         return false;
@@ -52,7 +52,8 @@ bool LimitReadBuffer::nextImpl()
 }
 
 
-LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_)
+LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_,
+                                 std::optional<size_t> exact_limit_, std::string exception_message_)
     : ReadBuffer(in_ ? in_->position() : nullptr, 0)
     , in(in_)
     , owns_in(owns)
@@ -71,13 +72,15 @@ LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, boo
 }
 
 
-LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_)
+LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_,
+                                 std::optional<size_t> exact_limit_, std::string exception_message_)
     : LimitReadBuffer(&in_, false, limit_, throw_exception_, exact_limit_, exception_message_)
 {
 }
 
 
-LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_)
+LimitReadBuffer::LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_,
+                                 std::optional<size_t> exact_limit_, std::string exception_message_)
     : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exact_limit_, exception_message_)
 {
 }
diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h
index 0f99bf2a08d..15885c1d850 100644
--- a/src/IO/LimitReadBuffer.h
+++ b/src/IO/LimitReadBuffer.h
@@ -13,8 +13,9 @@ namespace DB
 class LimitReadBuffer : public ReadBuffer
 {
 public:
-    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_ = {});
-    LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, bool exact_limit_,
+    LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_,
+                    std::optional<size_t> exact_limit_, std::string exception_message_ = {});
+    LimitReadBuffer(std::unique_ptr<ReadBuffer> in_, UInt64 limit_, bool throw_exception_, std::optional<size_t> exact_limit_,
                     std::string exception_message_ = {});
     ~LimitReadBuffer() override;
 
@@ -24,10 +25,10 @@ private:
 
     UInt64 limit;
     bool throw_exception;
-    bool exact_limit;
+    std::optional<size_t> exact_limit;
     std::string exception_message;
 
-    LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, bool exact_limit_, std::string exception_message_);
+    LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::optional<size_t> exact_limit_, std::string exception_message_);
 
     bool nextImpl() override;
 };
diff --git a/src/IO/examples/limit_read_buffer.cpp b/src/IO/examples/limit_read_buffer.cpp
index 559c87353f0..9c57c175620 100644
--- a/src/IO/examples/limit_read_buffer.cpp
+++ b/src/IO/examples/limit_read_buffer.cpp
@@ -24,13 +24,13 @@ int main(int argc, char ** argv)
 
     writeCString("--- first ---\n", out);
     {
-        LimitReadBuffer limit_in(in, limit, false);
+        LimitReadBuffer limit_in(in, limit, /* trow_exception */ false, /* exact_limit */ {});
         copyData(limit_in, out);
     }
 
     writeCString("\n--- second ---\n", out);
     {
-        LimitReadBuffer limit_in(in, limit, false);
+        LimitReadBuffer limit_in(in, limit, /* trow_exception */ false, /* exact_limit */ {});
         copyData(limit_in, out);
     }
 
diff --git a/src/IO/examples/limit_read_buffer2.cpp b/src/IO/examples/limit_read_buffer2.cpp
index ac7c43d764c..a0369047d3a 100644
--- a/src/IO/examples/limit_read_buffer2.cpp
+++ b/src/IO/examples/limit_read_buffer2.cpp
@@ -27,7 +27,7 @@ try
 
         ReadBuffer in(src.data(), src.size(), 0);
 
-        LimitReadBuffer limit_in(in, 1, false);
+        LimitReadBuffer limit_in(in, 1, /* trow_exception */ false, /* exact_limit */ {});
 
         {
             WriteBufferFromString out(dst);
@@ -55,7 +55,7 @@ try
             char x;
             readChar(x, in);
 
-            LimitReadBuffer limit_in(in, 1, false);
+            LimitReadBuffer limit_in(in, 1, /* trow_exception */ false, /* exact_limit */ {});
 
             copyData(limit_in, out);
 
@@ -85,7 +85,7 @@ try
         ReadBuffer in(src.data(), src.size(), 0);
 
         {
-            LimitReadBuffer limit_in(in, 1, false);
+            LimitReadBuffer limit_in(in, 1, /* trow_exception */ false, /* exact_limit */ {});
 
             char x;
             readChar(x, limit_in);
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index aeaacf35051..7fcdf1ff0dc 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1237,7 +1237,7 @@ void executeQuery(
 
         /// If not - copy enough data into 'parse_buf'.
         WriteBufferFromVector<PODArray<char>> out(parse_buf);
-        LimitReadBuffer limit(istr, max_query_size + 1, /* trow_exception */ false, /* exact_limit */ false);
+        LimitReadBuffer limit(istr, max_query_size + 1, /* trow_exception */ false, /* exact_limit */ {});
         copyData(limit, out);
         out.finalize();
 
diff --git a/src/Server/HTTP/HTMLForm.cpp b/src/Server/HTTP/HTMLForm.cpp
index d9d897d20c4..1abf9e5b83e 100644
--- a/src/Server/HTTP/HTMLForm.cpp
+++ b/src/Server/HTTP/HTMLForm.cpp
@@ -20,6 +20,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int CANNOT_READ_ALL_DATA;
+}
+
 namespace
 {
 
@@ -229,6 +234,11 @@ void HTMLForm::readMultipart(ReadBuffer & in_, PartHandler & handler)
         if (!in.skipToNextBoundary())
             break;
     }
+
+    /// It's important to check, because we could get "fake" EOF and incomplete request if a client suddenly died in the middle.
+    if (!in.isActualEOF())
+        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF, "
+                        "did not find the last boundary while parsing a multipart HTTP request");
 }
 
 
@@ -244,7 +254,8 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
     if (in.eof())
         return false;
 
-    assert(boundary_hit);
+    chassert(boundary_hit);
+    chassert(!found_last_boundary);
 
     boundary_hit = false;
 
@@ -255,7 +266,8 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
         {
             set(in.position(), 0);
             next();  /// We need to restrict our buffer to size of next available line.
-            return !startsWith(line, boundary + "--");
+            found_last_boundary = startsWith(line, boundary + "--");
+            return !found_last_boundary;
         }
     }
 
diff --git a/src/Server/HTTP/HTMLForm.h b/src/Server/HTTP/HTMLForm.h
index 16889b41d80..c75dafccaf0 100644
--- a/src/Server/HTTP/HTMLForm.h
+++ b/src/Server/HTTP/HTMLForm.h
@@ -108,10 +108,13 @@ public:
     /// Returns false if last boundary found.
     bool skipToNextBoundary();
 
+    bool isActualEOF() const { return found_last_boundary; }
+
 private:
     PeekableReadBuffer in;
     const std::string boundary;
     bool boundary_hit = true;
+    bool found_last_boundary = false;
 
     std::string readLine(bool append_crlf);
 
diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp
index d2e19d594dc..c9ffa3a4c3b 100644
--- a/src/Server/HTTP/HTTPServerRequest.cpp
+++ b/src/Server/HTTP/HTTPServerRequest.cpp
@@ -56,12 +56,17 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse
     if (getChunkedTransferEncoding())
         stream = std::make_unique<HTTPChunkedReadBuffer>(std::move(in), context->getMaxChunkSize());
     else if (hasContentLength())
-        stream = std::make_unique<LimitReadBuffer>(std::move(in), getContentLength(), /* trow_exception */ true, /* exact_limit */ true);
+    {
+        size_t content_length = getContentLength();
+        stream = std::make_unique<LimitReadBuffer>(std::move(in), content_length,
+                                                   /* trow_exception */ true, /* exact_limit */ content_length);
+    }
     else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE)
     {
         stream = std::move(in);
-        LOG_WARNING(&Poco::Logger::get("HTTPServerRequest"), "Got an HTTP request with no content length, "
-                    "it may be impossible to distinguish graceful EOF from abnormal connection loss");
+        if (!startsWith(getContentType(), "multipart/form-data"))
+            LOG_WARNING(&Poco::Logger::get("HTTPServerRequest"), "Got an HTTP request with no content length "
+                "and no chunked/multipart encoding, it may be impossible to distinguish graceful EOF from abnormal connection loss");
     }
     else
         /// We have to distinguish empty buffer and nullptr.
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 279896da9fa..4dcbca93e28 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -155,7 +155,7 @@ void MySQLHandler::run()
             payload.readStrict(command);
 
             // For commands which are executed without MemoryTracker.
-            LimitReadBuffer limited_payload(payload, 10000, /* trow_exception */ true, /* exact_limit */ false, "too long MySQL packet.");
+            LimitReadBuffer limited_payload(payload, 10000, /* trow_exception */ true, /* exact_limit */ {}, "too long MySQL packet.");
 
             LOG_DEBUG(log, "Received command: {}. Connection id: {}.",
                 static_cast<int>(static_cast<unsigned char>(command)), connection_id);
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 3e89d1810c7..bf5578f2610 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1057,7 +1057,7 @@ bool TCPHandler::receiveProxyHeader()
     /// Only PROXYv1 is supported.
     /// Validation of protocol is not fully performed.
 
-    LimitReadBuffer limit_in(*in, 107, /* trow_exception */ true, /* exact_limit */ false); /// Maximum length from the specs.
+    LimitReadBuffer limit_in(*in, 107, /* trow_exception */ true, /* exact_limit */ {}); /// Maximum length from the specs.
 
     assertString("PROXY ", limit_in);
 
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index 388faf1aeb5..014a0ff2424 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -118,7 +118,7 @@ private:
 
             if (limited_by_file_size)
             {
-                limited.emplace(*plain, file_size - offset, /* trow_exception */ false, /* exact_limit */ false);
+                limited.emplace(*plain, file_size - offset, /* trow_exception */ false, /* exact_limit */ std::optional<size_t>());
                 compressed.emplace(*limited);
             }
             else
diff --git a/tests/queries/0_stateless/02435_insert_cancellation_http.sh b/tests/queries/0_stateless/02435_insert_cancellation_http.sh
index 653ec59f491..e3a1645db63 100755
--- a/tests/queries/0_stateless/02435_insert_cancellation_http.sh
+++ b/tests/queries/0_stateless/02435_insert_cancellation_http.sh
@@ -10,24 +10,32 @@ export TEST_MARK="02435_insert_${CLICKHOUSE_DATABASE}_"
 
 $CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE
 $CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;'
-$CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&max_insert_block_size=100000&input_format_parallel_parsing=0&query=insert+into+dedup_test+format+TSV"  < $DATA_FILE
+
+function insert_data
+{
+    SETTINGS="query_id=$ID&max_insert_block_size=100000&input_format_parallel_parsing=0"
+    TRASH_SETTINGS="query_id=$ID&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=1100000&max_block_size=1100000&min_insert_block_size_bytes=0&min_insert_block_size_rows=1100000&max_insert_block_size=1100000"
+    TYPE=$(( RANDOM % 3 ))
+    if [[ "$TYPE" -eq 0 ]]; then
+        $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+    elif [[ "$TYPE" -eq 1 ]]; then
+        $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+    else
+        $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE
+    fi
+}
+
+export -f insert_data
+
+insert_data
 $CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
 
 function thread_insert
 {
     # supress "Killed" messages from bash
-    function wrap
-    {
-        if (( RANDOM % 2 )); then
-            $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&query_id=$ID&max_insert_block_size=100000&input_format_parallel_parsing=0&query=insert+into+dedup_test+format+TSV"  < $DATA_FILE
-        else
-            $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&query_id=$ID&max_insert_block_size=100000&input_format_parallel_parsing=0&query=insert+into+dedup_test+format+TSV"  < $DATA_FILE
-        fi
-    }
-    export -f wrap
     while true; do
         export ID="$TEST_MARK$RANDOM"
-        bash -c wrap 2>&1| grep -Fav "Killed"
+        bash -c insert_data 2>&1| grep -Fav "Killed"
     done
 }
 

From f49a67752ea252a189f073bb329396cdbea58b2f Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 23 Feb 2023 13:28:36 +0000
Subject: [PATCH 072/333] Support constant on LHS

---
 ...ass.cpp => LogicalExpressionOptimizer.cpp} | 53 ++++++++++---------
 ...ass.h => LogicalExpressionOptimizerPass.h} |  6 +--
 src/Analyzer/QueryTreePassManager.cpp         |  4 +-
 .../00736_disjunction_optimisation.reference  | 37 +++++++++++++
 .../00736_disjunction_optimisation.sql        |  4 ++
 ...ssions_optimizer_low_cardinality.reference | 22 ++++++++
 ..._expressions_optimizer_low_cardinality.sql |  2 +
 7 files changed, 98 insertions(+), 30 deletions(-)
 rename src/Analyzer/Passes/{OrEqualityChainToInPass.cpp => LogicalExpressionOptimizer.cpp} (63%)
 rename src/Analyzer/Passes/{OrEqualityChainToInPass.h => LogicalExpressionOptimizerPass.h} (81%)

diff --git a/src/Analyzer/Passes/OrEqualityChainToInPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
similarity index 63%
rename from src/Analyzer/Passes/OrEqualityChainToInPass.cpp
rename to src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
index 212e8252ce4..f9744cdc8d0 100644
--- a/src/Analyzer/Passes/OrEqualityChainToInPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
@@ -1,4 +1,4 @@
-#include <Analyzer/Passes/OrEqualityChainToInPass.h>
+#include <Analyzer/Passes/LogicalExpressionOptimizerPass.h>
 
 #include <Functions/FunctionFactory.h>
 
@@ -10,12 +10,12 @@
 namespace DB
 {
 
-class OrEqualityChainToInVisitor : public InDepthQueryTreeVisitorWithContext<OrEqualityChainToInVisitor>
+class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<LogicalExpressionOptimizerVisitor>
 {
 public:
-    using Base = InDepthQueryTreeVisitorWithContext<OrEqualityChainToInVisitor>;
+    using Base = InDepthQueryTreeVisitorWithContext<LogicalExpressionOptimizerVisitor>;
 
-    explicit OrEqualityChainToInVisitor(ContextPtr context)
+    explicit LogicalExpressionOptimizerVisitor(ContextPtr context)
         : Base(std::move(context))
     {}
 
@@ -34,38 +34,32 @@ public:
         for (const auto & argument : function_node->getArguments())
         {
             auto * argument_function = argument->as<FunctionNode>();
-            if (!argument_function)
+            if (!argument_function || argument_function->getFunctionName() != "equals")
             {
                 or_operands.push_back(argument);
                 continue;
             }
 
             /// collect all equality checks (x = value)
-            if (argument_function->getFunctionName() != "equals")
-            {
-                or_operands.push_back(argument);
-                continue;
-            }
 
             const auto & equals_arguments = argument_function->getArguments().getNodes();
             const auto & lhs = equals_arguments[0];
+            const auto & rhs = equals_arguments[1];
 
-            const auto * rhs_literal = equals_arguments[1]->as<ConstantNode>();
-            if (!rhs_literal)
-            {
+            if (lhs->as<ConstantNode>())
+                node_to_equals_functions[rhs].push_back(argument);
+            else if (rhs->as<ConstantNode>())
+                node_to_equals_functions[lhs].push_back(argument);
+            else
                 or_operands.push_back(argument);
-                continue;
-            }
-
-            node_to_equals_functions[lhs].push_back(argument);
         }
 
         auto in_function_resolver = FunctionFactory::instance().get("in", getContext());
 
-        for (auto & [lhs, equals_functions] : node_to_equals_functions)
+        for (auto & [expression, equals_functions] : node_to_equals_functions)
         {
             const auto & settings = getSettings();
-            if (equals_functions.size() < settings.optimize_min_equality_disjunction_chain_length && !lhs.node->getResultType()->lowCardinality())
+            if (equals_functions.size() < settings.optimize_min_equality_disjunction_chain_length && !expression.node->getResultType()->lowCardinality())
             {
                 std::move(equals_functions.begin(), equals_functions.end(), std::back_inserter(or_operands));
                 continue;
@@ -78,9 +72,18 @@ public:
             {
                 const auto * equals_function = equals->as<FunctionNode>();
                 assert(equals_function && equals_function->getFunctionName() == "equals");
-                const auto * rhs_literal = equals_function->getArguments().getNodes()[1]->as<ConstantNode>();
-                assert(rhs_literal);
-                args.push_back(rhs_literal->getValue());
+
+                const auto & equals_arguments = equals_function->getArguments().getNodes();
+                if (const auto * rhs_literal = equals_arguments[1]->as<ConstantNode>())
+                {
+                    args.push_back(rhs_literal->getValue());
+                }
+                else
+                {
+                    const auto * lhs_literal = equals_arguments[0]->as<ConstantNode>();
+                    assert(lhs_literal);
+                    args.push_back(lhs_literal->getValue());
+                }
             }
 
             auto rhs_node = std::make_shared<ConstantNode>(std::move(args));
@@ -89,7 +92,7 @@ public:
 
             QueryTreeNodes in_arguments;
             in_arguments.reserve(2);
-            in_arguments.push_back(lhs.node);
+            in_arguments.push_back(expression.node);
             in_arguments.push_back(std::move(rhs_node));
 
             in_function->getArguments().getNodes() = std::move(in_arguments);
@@ -108,9 +111,9 @@ public:
 };
 
 
-void OrEqualityChainToInPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
-    OrEqualityChainToInVisitor visitor(std::move(context));
+    LogicalExpressionOptimizerVisitor visitor(std::move(context));
     visitor.visit(query_tree_node);
 }
 
diff --git a/src/Analyzer/Passes/OrEqualityChainToInPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
similarity index 81%
rename from src/Analyzer/Passes/OrEqualityChainToInPass.h
rename to src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index 2a9d6818a4f..327ae3c3fee 100644
--- a/src/Analyzer/Passes/OrEqualityChainToInPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -8,7 +8,7 @@ namespace DB
 /**
  * This pass replaces chains of equality functions inside an OR with a single IN operator.
  * The replacement is done if:
- *  - rhs of the equality function is a literal
+ *  - rhs of the equality function is a constant
  *  - length of chain is at least 'optimize_min_equality_disjunction_chain_length' long OR lhs is LowCardinality
  *
  * E.g. (optimize_min_equality_disjunction_chain_length = 2)
@@ -25,10 +25,10 @@ namespace DB
  * -------------------------------
  */
 
-class OrEqualityChainToInPass final : public IQueryTreePass
+class LogicalExpressionOptimizerPass final : public IQueryTreePass
 {
 public:
-    String getName() override { return "OrEqualityChainToIn"; }
+    String getName() override { return "LogicalExpressionOptimizer"; }
 
     String getDescription() override { return "Transform all the 'or's with equality check to a single IN function"; }
 
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index dbbedfd1aaa..da419f0e5bd 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -39,7 +39,7 @@
 #include <Analyzer/Passes/AutoFinalOnQueryPass.h>
 #include <Analyzer/Passes/ArrayExistsToHasPass.h>
 #include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
-#include <Analyzer/Passes/OrEqualityChainToInPass.h>
+#include <Analyzer/Passes/LogicalExpressionOptimizerPass.h>
 
 
 namespace DB
@@ -264,7 +264,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
 
     manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
 
-    manager.addPass(std::make_unique<OrEqualityChainToInPass>());
+    manager.addPass(std::make_unique<LogicalExpressionOptimizerPass>());
 
     manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
 
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.reference b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
index 4210571842f..fa395dccdc8 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.reference
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
@@ -442,3 +442,40 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+21	1
+22	1
+23	1
+QUERY id: 0
+  PROJECTION COLUMNS
+    s UInt64
+    or(equals(s, 21), equals(22, s), equals(23, s)) UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 5, nodes: 2
+            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 7, nodes: 2
+                  COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+  JOIN TREE
+    TABLE id: 3, table_name: default.bug
+  SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.sql b/tests/queries/0_stateless/00736_disjunction_optimisation.sql
index e52ed2ab45b..e5bfc81f7ae 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.sql
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.sql
@@ -42,4 +42,8 @@ select s, (s=21 or s=22 or s=23) from bug;
 select s, (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
 explain query tree select s, (s=21 or s=22 or s=23) from bug SETTINGS allow_experimental_analyzer = 1;;
 
+select s, (s=21 or 22=s or 23=s) from bug;
+select s, (s=21 or 22=s or 23=s) from bug SETTINGS allow_experimental_analyzer = 1;;
+explain query tree select s, (s=21 or 22=s or 23=s) from bug SETTINGS allow_experimental_analyzer = 1;;
+
 DROP TABLE bug;
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
index 086c0a49b59..617bdde183e 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -22,6 +22,28 @@ QUERY id: 0
   SETTINGS allow_experimental_analyzer=1
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
+WHERE (a = \'x\') OR (\'y\' = a)
+QUERY id: 0
+  PROJECTION COLUMNS
+    a LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
+  WHERE
+    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 5, nodes: 2
+          FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 7, nodes: 2
+                COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+                CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
+          CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+  SETTINGS allow_experimental_analyzer=1
+SELECT a
+FROM t_logical_expressions_optimizer_low_cardinality
 WHERE (b = 0) OR (b = 1)
 QUERY id: 0
   PROJECTION COLUMNS
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
index f5368679f60..14f8ad830e7 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
@@ -5,6 +5,8 @@ CREATE TABLE t_logical_expressions_optimizer_low_cardinality (a LowCardinality(S
 -- LowCardinality case, ignore optimize_min_equality_disjunction_chain_length limit, optimzer applied
 EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y';
 EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y' SETTINGS allow_experimental_analyzer = 1;
+EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR 'y' = a;
+EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR 'y' = a SETTINGS allow_experimental_analyzer = 1;
 -- Non-LowCardinality case, optimizer not applied for short chains
 EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1;
 EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1 SETTINGS allow_experimental_analyzer = 1;

From 88f322ace267dc1c194f85db5be1e94ba29c05c8 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 23 Feb 2023 14:50:58 +0100
Subject: [PATCH 073/333] Try fix test

---
 tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh
index e84c06027e8..d27a2f9fcbb 100755
--- a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh
+++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh
@@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 echo "CapnProto"
-${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format CapnProto settings format_schema='format_schemas/02566_ipv4_ipv6:Message'" > 02566_ipv4_ipv6_data.capnp
-${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.capnp, auto, 'ipv6 IPv6, ipv4 IPv4') settings format_schema='format_schemas/02566_ipv4_ipv6:Message'"
+${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format CapnProto settings format_schema='$CURDIR/format_schemas/02566_ipv4_ipv6:Message'" > 02566_ipv4_ipv6_data.capnp
+${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.capnp, auto, 'ipv6 IPv6, ipv4 IPv4') settings format_schema='$CURDIR/format_schemas/02566_ipv4_ipv6:Message'"
 rm 02566_ipv4_ipv6_data.capnp
 
 echo "Avro"

From e37a3801ca4478bb88d21cc9d8eda029ba4dbc8a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 23 Feb 2023 16:04:51 +0000
Subject: [PATCH 074/333] Add new logical optimizations

---
 .../Passes/LogicalExpressionOptimizer.cpp     | 148 ++++++++++++++++--
 ...00621_regression_for_in_operator.reference |   4 +-
 .../00736_disjunction_optimisation.reference  |  64 ++++----
 ...ssions_optimizer_low_cardinality.reference |   8 +-
 ...imizer_removing_redundant_checks.reference |   0
 ...al_optimizer_removing_redundant_checks.sql |  26 +++
 6 files changed, 202 insertions(+), 48 deletions(-)
 create mode 100644 tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
 create mode 100644 tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
index f9744cdc8d0..c29d38c7865 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
@@ -7,6 +7,8 @@
 #include <Analyzer/ConstantNode.h>
 #include <Analyzer/HashUtils.h>
 
+#include <DataTypes/DataTypeString.h>
+
 namespace DB
 {
 
@@ -17,21 +19,115 @@ public:
 
     explicit LogicalExpressionOptimizerVisitor(ContextPtr context)
         : Base(std::move(context))
+        , cast_function_resolver(FunctionFactory::instance().get("_CAST", getContext()))
     {}
 
-
     void visitImpl(QueryTreeNodePtr & node)
     {
         auto * function_node = node->as<FunctionNode>();
 
-        if (!function_node || function_node->getFunctionName() != "or")
+        if (!function_node)
             return;
 
+        if (function_node->getFunctionName() == "or")
+        {
+            tryReplaceOrEqualsChainWithIn(node);
+            return;
+        }
+
+        if (function_node->getFunctionName() == "and")
+        {
+            tryReplaceAndEqualsChainsWithConstant(node);
+            return;
+        }
+    }
+private:
+    void tryReplaceAndEqualsChainsWithConstant(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "and");
+
+        if (function_node.getResultType()->isNullable())
+            return;
+
+        QueryTreeNodes and_operands;
+
+        QueryTreeNodePtrWithHashMap<const ConstantNode *> node_to_constants;
+
+        for (const auto & argument : function_node.getArguments())
+        {
+            auto * argument_function = argument->as<FunctionNode>();
+            if (!argument_function || argument_function->getFunctionName() != "equals")
+            {
+                and_operands.push_back(argument);
+                continue;
+            }
+
+            const auto & equals_arguments = argument_function->getArguments().getNodes();
+            const auto & lhs = equals_arguments[0];
+            const auto & rhs = equals_arguments[1];
+
+            const auto has_and_with_different_constant = [&](const QueryTreeNodePtr & expression, const ConstantNode * constant)
+            {
+                if (auto it = node_to_constants.find(expression); it != node_to_constants.end())
+                {
+                    if (!it->second->isEqual(*constant))
+                        return true;
+                }
+                else
+                {
+                    node_to_constants.emplace(expression, constant);
+                    and_operands.push_back(argument);
+                }
+
+                return false;
+            };
+
+            bool collapse_to_false = false;
+
+            if (const auto * lhs_literal = lhs->as<ConstantNode>())
+            {
+                collapse_to_false = has_and_with_different_constant(rhs, lhs_literal);
+            }
+            else if (const auto * rhs_literal = rhs->as<ConstantNode>())
+            {
+                collapse_to_false = has_and_with_different_constant(lhs, rhs_literal);
+            }
+            else
+                continue;
+
+            if (collapse_to_false)
+            {
+                auto false_value = std::make_shared<ConstantValue>(0u, function_node.getResultType());
+                auto false_node = std::make_shared<ConstantNode>(std::move(false_value));
+                node = std::move(false_node);
+                return;
+            }
+        }
+
+        if (and_operands.size() == 1)
+        {
+            assert(!function_node.getResultType()->isNullable());
+            resolveAsCast(function_node, std::move(and_operands[0]));
+            return;
+        }
+
+        auto and_function_resolver = FunctionFactory::instance().get("and", getContext());
+        function_node.getArguments().getNodes() = std::move(and_operands);
+        function_node.resolveAsFunction(and_function_resolver);
+    }
+
+    void tryReplaceOrEqualsChainWithIn(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "or");
+
         QueryTreeNodes or_operands;
 
         QueryTreeNodePtrWithHashMap<QueryTreeNodes> node_to_equals_functions;
+        QueryTreeNodePtrWithHashMap<QueryTreeNodeConstRawPtrWithHashSet> node_to_constants;
 
-        for (const auto & argument : function_node->getArguments())
+        for (const auto & argument : function_node.getArguments())
         {
             auto * argument_function = argument->as<FunctionNode>();
             if (!argument_function || argument_function->getFunctionName() != "equals")
@@ -46,10 +142,20 @@ public:
             const auto & lhs = equals_arguments[0];
             const auto & rhs = equals_arguments[1];
 
-            if (lhs->as<ConstantNode>())
-                node_to_equals_functions[rhs].push_back(argument);
-            else if (rhs->as<ConstantNode>())
-                node_to_equals_functions[lhs].push_back(argument);
+            const auto add_equals_function_if_not_present = [&](const auto & expression_node, const ConstantNode * constant)
+            {
+                auto & constant_set = node_to_constants[expression_node];
+                if (!constant_set.contains(constant))
+                {
+                    constant_set.insert(constant);
+                    node_to_equals_functions[expression_node].push_back(argument);
+                }
+            };
+
+            if (const auto * lhs_literal = lhs->as<ConstantNode>())
+                add_equals_function_if_not_present(rhs, lhs_literal);
+            else if (const auto * rhs_literal = rhs->as<ConstantNode>())
+                add_equals_function_if_not_present(lhs, rhs_literal);
             else
                 or_operands.push_back(argument);
         }
@@ -102,12 +208,34 @@ public:
         }
 
         if (or_operands.size() == 1)
-            or_operands.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));
+        {
+            assert(!function_node.getResultType()->isNullable());
+            resolveAsCast(function_node, std::move(or_operands[0]));
+            return;
+        }
 
         auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
-        function_node->getArguments().getNodes() = std::move(or_operands);
-        function_node->resolveAsFunction(or_function_resolver);
+        function_node.getArguments().getNodes() = std::move(or_operands);
+        function_node.resolveAsFunction(or_function_resolver);
     }
+
+    void resolveAsCast(FunctionNode & function_node, QueryTreeNodePtr operand)
+    {
+        std::string cast_type = function_node.getResultType()->getName();
+        auto cast_type_constant_value = std::make_shared<ConstantValue>(std::move(cast_type), std::make_shared<DataTypeString>());
+        auto cast_type_constant_node = std::make_shared<ConstantNode>(std::move(cast_type_constant_value));
+
+        QueryTreeNodes arguments;
+        arguments.reserve(2);
+        arguments.push_back(std::move(operand));
+        arguments.push_back(std::move(cast_type_constant_node));
+
+        function_node.getArguments().getNodes() = std::move(arguments);
+
+        function_node.resolveAsFunction(cast_function_resolver);
+    }
+
+    const FunctionOverloadResolverPtr cast_function_resolver;
 };
 
 
diff --git a/tests/queries/0_stateless/00621_regression_for_in_operator.reference b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
index c9eb7a08fc6..54b345b65c0 100644
--- a/tests/queries/0_stateless/00621_regression_for_in_operator.reference
+++ b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
@@ -12,7 +12,7 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.regression_for_in_operator_view
   WHERE
-    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 5, nodes: 2
           FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -20,7 +20,7 @@ QUERY id: 0
               LIST id: 7, nodes: 2
                 COLUMN id: 8, column_name: g, result_type: String, source_id: 3
                 CONSTANT id: 9, constant_value: Tuple_(\'5\', \'6\'), constant_value_type: Tuple(String, String)
-          CONSTANT id: 10, constant_value: UInt64_0, constant_value_type: UInt8
+          CONSTANT id: 10, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 2
 2
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.reference b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
index fa395dccdc8..4b899527913 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.reference
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
@@ -39,7 +39,7 @@ QUERY id: 0
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 6, nodes: 2
-          FUNCTION id: 7, function_name: or, function_type: ordinary, result_type: UInt8
+          FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: UInt8
             ARGUMENTS
               LIST id: 8, nodes: 2
                 FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
@@ -47,8 +47,8 @@ QUERY id: 0
                     LIST id: 10, nodes: 2
                       COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 3
                       CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 13, constant_value: UInt64_0, constant_value_type: UInt8
-          FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+                CONSTANT id: 13, constant_value: \'UInt8\', constant_value_type: String
+          FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: UInt8
             ARGUMENTS
               LIST id: 15, nodes: 2
                 FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
@@ -56,7 +56,7 @@ QUERY id: 0
                     LIST id: 17, nodes: 2
                       COLUMN id: 18, column_name: s, result_type: UInt64, source_id: 3
                       CONSTANT id: 19, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 20, constant_value: UInt64_0, constant_value_type: UInt8
+                CONSTANT id: 20, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 1	21
 1	22
@@ -96,7 +96,7 @@ QUERY id: 0
       JOIN TREE
         TABLE id: 7, table_name: default.bug
       WHERE
-        FUNCTION id: 9, function_name: or, function_type: ordinary, result_type: UInt8
+        FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: UInt8
           ARGUMENTS
             LIST id: 10, nodes: 2
               FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
@@ -104,9 +104,9 @@ QUERY id: 0
                   LIST id: 12, nodes: 2
                     COLUMN id: 13, column_name: k, result_type: UInt64, source_id: 7
                     CONSTANT id: 14, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-              CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8
+              CONSTANT id: 15, constant_value: \'UInt8\', constant_value_type: String
   WHERE
-    FUNCTION id: 16, function_name: or, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 16, function_name: _CAST, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 17, nodes: 2
           FUNCTION id: 18, function_name: in, function_type: ordinary, result_type: UInt8
@@ -114,7 +114,7 @@ QUERY id: 0
               LIST id: 19, nodes: 2
                 COLUMN id: 20, column_name: s, result_type: UInt64, source_id: 3
                 CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-          CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8
+          CONSTANT id: 22, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 1	1	21	1	1	1
 1	1	22	0	1	1
@@ -145,7 +145,7 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 6
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
             FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -153,14 +153,14 @@ QUERY id: 0
                 LIST id: 7, nodes: 2
                   COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
                   CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
       COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
       FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 12, nodes: 2
             COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
             CONSTANT id: 13, constant_value: UInt64_21, constant_value_type: UInt8
-      FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 15, nodes: 2
             FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
@@ -168,8 +168,8 @@ QUERY id: 0
                 LIST id: 17, nodes: 2
                   COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 18, constant_value: Tuple_(UInt64_21, UInt64_22), constant_value_type: Tuple(UInt8, UInt8)
-            CONSTANT id: 19, constant_value: UInt64_0, constant_value_type: UInt8
-      FUNCTION id: 20, function_name: or, function_type: ordinary, result_type: UInt8
+            CONSTANT id: 19, constant_value: \'UInt8\', constant_value_type: String
+      FUNCTION id: 20, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 21, nodes: 2
             FUNCTION id: 22, function_name: in, function_type: ordinary, result_type: UInt8
@@ -177,7 +177,7 @@ QUERY id: 0
                 LIST id: 23, nodes: 2
                   COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 24, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 25, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 25, constant_value: \'UInt8\', constant_value_type: String
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -206,7 +206,7 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 2
       COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
             FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -214,7 +214,7 @@ QUERY id: 0
                 LIST id: 7, nodes: 2
                   COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -259,7 +259,7 @@ QUERY id: 0
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 6, nodes: 2
-          FUNCTION id: 7, function_name: or, function_type: ordinary, result_type: UInt8
+          FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: UInt8
             ARGUMENTS
               LIST id: 8, nodes: 2
                 FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
@@ -267,8 +267,8 @@ QUERY id: 0
                     LIST id: 10, nodes: 2
                       COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 3
                       CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 13, constant_value: UInt64_0, constant_value_type: UInt8
-          FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
+                CONSTANT id: 13, constant_value: \'UInt8\', constant_value_type: String
+          FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: UInt8
             ARGUMENTS
               LIST id: 15, nodes: 2
                 FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
@@ -276,7 +276,7 @@ QUERY id: 0
                     LIST id: 17, nodes: 2
                       COLUMN id: 18, column_name: s, result_type: UInt64, source_id: 3
                       CONSTANT id: 19, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 20, constant_value: UInt64_0, constant_value_type: UInt8
+                CONSTANT id: 20, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 1	21
 1	22
@@ -316,7 +316,7 @@ QUERY id: 0
       JOIN TREE
         TABLE id: 7, table_name: default.bug
       WHERE
-        FUNCTION id: 9, function_name: or, function_type: ordinary, result_type: UInt8
+        FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: UInt8
           ARGUMENTS
             LIST id: 10, nodes: 2
               FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
@@ -324,9 +324,9 @@ QUERY id: 0
                   LIST id: 12, nodes: 2
                     COLUMN id: 13, column_name: k, result_type: UInt64, source_id: 7
                     CONSTANT id: 14, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-              CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8
+              CONSTANT id: 15, constant_value: \'UInt8\', constant_value_type: String
   WHERE
-    FUNCTION id: 16, function_name: or, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 16, function_name: _CAST, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 17, nodes: 2
           FUNCTION id: 18, function_name: in, function_type: ordinary, result_type: UInt8
@@ -334,7 +334,7 @@ QUERY id: 0
               LIST id: 19, nodes: 2
                 COLUMN id: 20, column_name: s, result_type: UInt64, source_id: 3
                 CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-          CONSTANT id: 22, constant_value: UInt64_0, constant_value_type: UInt8
+          CONSTANT id: 22, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 1	1	21	1	1	1
 1	1	22	0	1	1
@@ -365,7 +365,7 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 6
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
             FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -373,7 +373,7 @@ QUERY id: 0
                 LIST id: 7, nodes: 2
                   COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
                   CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
       COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
       FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
         ARGUMENTS
@@ -393,7 +393,7 @@ QUERY id: 0
                 LIST id: 20, nodes: 2
                   COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 21, constant_value: UInt64_22, constant_value_type: UInt8
-      FUNCTION id: 22, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 22, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 23, nodes: 2
             FUNCTION id: 24, function_name: in, function_type: ordinary, result_type: UInt8
@@ -401,7 +401,7 @@ QUERY id: 0
                 LIST id: 25, nodes: 2
                   COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 26, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 27, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 27, constant_value: \'UInt8\', constant_value_type: String
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -430,7 +430,7 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 2
       COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
             FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -438,7 +438,7 @@ QUERY id: 0
                 LIST id: 7, nodes: 2
                   COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -467,7 +467,7 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 2
       COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
             FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -475,7 +475,7 @@ QUERY id: 0
                 LIST id: 7, nodes: 2
                   COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
                   CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
index 617bdde183e..22bd68e0ac1 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -10,7 +10,7 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
-    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 5, nodes: 2
           FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -18,7 +18,7 @@ QUERY id: 0
               LIST id: 7, nodes: 2
                 COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
                 CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
-          CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+          CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
@@ -32,7 +32,7 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
-    FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 5, nodes: 2
           FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
@@ -40,7 +40,7 @@ QUERY id: 0
               LIST id: 7, nodes: 2
                 COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
                 CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
-          CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8
+          CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
   SETTINGS allow_experimental_analyzer=1
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql
new file mode 100644
index 00000000000..f20ef412215
--- /dev/null
+++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql
@@ -0,0 +1,26 @@
+SET allow_experimental_analyzer = 1;
+
+DROP TABLE IF EXISTS 02668_logical_optimizer;
+
+CREATE TABLE 02668_logical_optimizer
+(a Int32, b LowCardinality(String))
+ENGINE=Memory;
+
+INSERT INTO 02668_logical_optimizer VALUES (1, 'test'), (2, 'test2'), (3, 'another');
+
+SET optimize_min_equality_disjunction_chain_length = 2;
+
+SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 3 = a OR 1 = a;
+EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 3 = a OR 1 = a;
+
+SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 1 = a;
+EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 1 = a;
+
+SELECT * FROM 02668_logical_optimizer WHERE a = 1 AND 2 = a;
+EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 1 AND 2 = a;
+
+SELECT * FROM 02668_logical_optimizer WHERE 3 = a AND b = 'another' AND a = 3;
+EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 3 AND b = 'another' AND a = 3;
+
+SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a;
+EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a;

From d21a6a3ba40f2259aba17ff26acf930814a35f16 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 23 Feb 2023 20:42:46 +0100
Subject: [PATCH 075/333] fix

---
 .../PushingAsyncPipelineExecutor.cpp          |  2 +-
 .../Executors/PushingPipelineExecutor.cpp     |  2 +-
 ..._cancel_insert_when_client_dies.reference} |  1 +
 ...> 02434_cancel_insert_when_client_dies.sh} | 29 +++++++--
 ...434_insert_cancellation_native_protocol.sh | 65 -------------------
 .../02435_insert_cancellation_http.reference  |  2 -
 utils/check-mysql-binlog/main.cpp             |  3 +-
 7 files changed, 27 insertions(+), 77 deletions(-)
 rename tests/queries/0_stateless/{02434_insert_cancellation_native_protocol.reference => 02434_cancel_insert_when_client_dies.reference} (88%)
 rename tests/queries/0_stateless/{02435_insert_cancellation_http.sh => 02434_cancel_insert_when_client_dies.sh} (59%)
 delete mode 100755 tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh
 delete mode 100644 tests/queries/0_stateless/02435_insert_cancellation_http.reference

diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
index 20f47c6b54c..70815bb8b3b 100644
--- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
@@ -140,7 +140,7 @@ PushingAsyncPipelineExecutor::PushingAsyncPipelineExecutor(QueryPipeline & pipel
 PushingAsyncPipelineExecutor::~PushingAsyncPipelineExecutor()
 {
     /// It must be finalized explicitly. Otherwise we cancel it assuming it's due to an exception.
-    chassert(finished || std::uncaught_exceptions());
+    chassert(finished || std::uncaught_exceptions() || std::current_exception());
     try
     {
         cancel();
diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp
index ca60932ed6f..696932932df 100644
--- a/src/Processors/Executors/PushingPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingPipelineExecutor.cpp
@@ -64,7 +64,7 @@ PushingPipelineExecutor::PushingPipelineExecutor(QueryPipeline & pipeline_) : pi
 PushingPipelineExecutor::~PushingPipelineExecutor()
 {
     /// It must be finalized explicitly. Otherwise we cancel it assuming it's due to an exception.
-    chassert(finished || std::uncaught_exceptions());
+    chassert(finished || std::uncaught_exceptions() || std::current_exception());
     try
     {
         cancel();
diff --git a/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.reference
similarity index 88%
rename from tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference
rename to tests/queries/0_stateless/02434_cancel_insert_when_client_dies.reference
index e3038e03530..d2475419998 100644
--- a/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.reference
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.reference
@@ -1,2 +1,3 @@
 5000000
 5000000
+1
diff --git a/tests/queries/0_stateless/02435_insert_cancellation_http.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
similarity index 59%
rename from tests/queries/0_stateless/02435_insert_cancellation_http.sh
rename to tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index e3a1645db63..c2cf98f712f 100755
--- a/tests/queries/0_stateless/02435_insert_cancellation_http.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-random-settings
 # shellcheck disable=SC2009
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
@@ -6,19 +7,24 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv"
-export TEST_MARK="02435_insert_${CLICKHOUSE_DATABASE}_"
+export TEST_MARK="02434_insert_${CLICKHOUSE_DATABASE}_"
 
 $CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE
 $CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;'
 
 function insert_data
 {
-    SETTINGS="query_id=$ID&max_insert_block_size=100000&input_format_parallel_parsing=0"
-    TRASH_SETTINGS="query_id=$ID&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=1100000&max_block_size=1100000&min_insert_block_size_bytes=0&min_insert_block_size_rows=1100000&max_insert_block_size=1100000"
-    TYPE=$(( RANDOM % 3 ))
+    SETTINGS="query_id=$ID&max_insert_block_size=110000&min_insert_block_size_rows=110000"
+    # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation)
+    TRASH_SETTINGS="query_id=$ID&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=110000&max_block_size=10000&min_insert_block_size_bytes=0&min_insert_block_size_rows=110000&max_insert_block_size=110000"
+    TYPE=$(( RANDOM % 4 ))
+
     if [[ "$TYPE" -eq 0 ]]; then
-        $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+        # client will send 10000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation)
+        $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" -q 'insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE
     elif [[ "$TYPE" -eq 1 ]]; then
+        $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+    elif [[ "$TYPE" -eq 2 ]]; then
         $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
     else
         $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE
@@ -27,6 +33,7 @@ function insert_data
 
 export -f insert_data
 
+ID="02434_insert_init_${CLICKHOUSE_DATABASE}_$RANDOM"
 insert_data
 $CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
 
@@ -66,12 +73,20 @@ export -f thread_insert;
 export -f thread_select;
 export -f thread_cancel;
 
-TIMEOUT=30
+TIMEOUT=40    # 10 seconds for each TYPE
 
 timeout $TIMEOUT bash -c thread_insert &
 timeout $TIMEOUT bash -c thread_select &
-timeout $TIMEOUT bash -c thread_cancel &
+timeout $TIMEOUT bash -c thread_cancel 2> /dev/null &
 
 wait
 
 $CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
+
+$CLICKHOUSE_CLIENT -q 'system flush logs'
+
+# We have to ignore stderr from thread_cancel, because our CI finds a bug in ps...
+# So use this query to check that thread_cancel do something
+$CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
+  message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
+  message like '%Connection reset by peer%')"
diff --git a/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh b/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh
deleted file mode 100755
index 4eb02c38c1a..00000000000
--- a/tests/queries/0_stateless/02434_insert_cancellation_native_protocol.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env bash
-# shellcheck disable=SC2009
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv"
-export TEST_MARK="02434_insert_${CLICKHOUSE_DATABASE}_"
-
-$CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE
-$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;'
-$CLICKHOUSE_CLIENT --max_block_size=100000 --min_chunk_bytes_for_parallel_parsing=10000 -q 'insert into dedup_test format TSV' < $DATA_FILE
-$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
-
-function thread_insert
-{
-    # supress "Killed" messages from bash
-    function wrap
-    {
-        $CLICKHOUSE_CLIENT --max_block_size=100000 --min_chunk_bytes_for_parallel_parsing=10000 --query_id="$ID" -q 'insert into dedup_test format TSV' < $DATA_FILE
-    }
-    export -f wrap
-    while true; do
-        export ID="$TEST_MARK$RANDOM"
-        bash -c wrap 2>&1| grep -Fav "Killed"
-    done
-}
-
-function thread_select
-{
-    while true; do
-        $CLICKHOUSE_CLIENT -q "with (select count() from dedup_test) as c select throwIf(c != 5000000, 'Expected 5000000 rows, got ' || toString(c)) format Null"
-        sleep 0.$RANDOM;
-    done
-}
-
-function thread_cancel
-{
-    while true; do
-        SIGNAL="INT"
-        if (( RANDOM % 2 )); then
-            SIGNAL="KILL"
-        fi
-        PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
-        if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID" || echo "$PID"; fi
-        sleep 0.$RANDOM;
-        sleep 0.$RANDOM;
-        sleep 0.$RANDOM;
-    done
-}
-
-export -f thread_insert;
-export -f thread_select;
-export -f thread_cancel;
-
-TIMEOUT=30
-
-timeout $TIMEOUT bash -c thread_insert &
-timeout $TIMEOUT bash -c thread_select &
-timeout $TIMEOUT bash -c thread_cancel &
-
-wait
-
-$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
diff --git a/tests/queries/0_stateless/02435_insert_cancellation_http.reference b/tests/queries/0_stateless/02435_insert_cancellation_http.reference
deleted file mode 100644
index e3038e03530..00000000000
--- a/tests/queries/0_stateless/02435_insert_cancellation_http.reference
+++ /dev/null
@@ -1,2 +0,0 @@
-5000000
-5000000
diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp
index 7dd387ba5be..cf2a27e8aac 100644
--- a/utils/check-mysql-binlog/main.cpp
+++ b/utils/check-mysql-binlog/main.cpp
@@ -17,7 +17,8 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody(
     std::shared_ptr<DB::MySQLReplication::TableMapEvent> & last_table_map_event, bool exist_checksum)
 {
     DB::MySQLReplication::BinlogEventPtr event;
-    DB::ReadBufferPtr limit_read_buffer = std::make_shared<DB::LimitReadBuffer>(payload, header.event_size - 19, false);
+    DB::ReadBufferPtr limit_read_buffer = std::make_shared<DB::LimitReadBuffer>(payload, header.event_size - 19,
+                                                                                /* trow_exception */ false, /* exact_limit */ {});
     DB::ReadBufferPtr event_payload = std::make_shared<DB::MySQLBinlogEventReadBuffer>(*limit_read_buffer, exist_checksum ? 4 : 0);
 
     switch (header.type)

From 240e0070e5b4ea55b22a687fcad375131cbdad70 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 23 Feb 2023 23:07:35 +0100
Subject: [PATCH 076/333] fix

---
 src/Interpreters/AsynchronousInsertQueue.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index ac8101d4ca2..78b173de6dc 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -210,7 +210,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
         /// to avoid buffering of huge amount of data in memory.
 
         auto read_buf = getReadBufferFromASTInsertQuery(query);
-        LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, false);
+        LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {});
 
         WriteBufferFromString write_buf(bytes);
         copyData(limit_buf, write_buf);

From 5e5e802348af021c6dbeefd5a05e2ad98c1a21bf Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 23 Feb 2023 22:54:50 +0000
Subject: [PATCH 077/333] return back optimization for ALTER MODIFY Nullable

---
 src/Storages/MergeTree/MutateTask.cpp         | 45 +++++++++++++++++--
 .../02669_alter_modify_to_nullable.reference  |  8 ++++
 .../02669_alter_modify_to_nullable.sql        | 31 +++++++++++++
 3 files changed, 80 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02669_alter_modify_to_nullable.reference
 create mode 100644 tests/queries/0_stateless/02669_alter_modify_to_nullable.sql

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 8d95a76b5da..de623c58538 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -19,6 +19,7 @@
 #include <Storages/MergeTree/MergeTreeDataWriter.h>
 #include <Storages/MutationCommands.h>
 #include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <boost/algorithm/string/replace.hpp>
 
 
@@ -1568,6 +1569,45 @@ bool MutateTask::execute()
     return false;
 }
 
+static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const MutationCommand & command)
+{
+    if (command.type != MutationCommand::READ_COLUMN)
+        return false;
+
+    auto part_column = part->tryGetColumn(command.column_name);
+    if (!part_column)
+        return false;
+
+    /// For ALTER MODIFY COLUMN from 'Type' to 'Nullable(Type)' we can skip mutatation and
+    /// apply only metadata conversion. But it doesn't work for custom serialization.
+    const auto * to_nullable = typeid_cast<const DataTypeNullable *>(command.data_type.get());
+    if (!to_nullable)
+        return false;
+
+    if (!part_column->type->equals(*to_nullable->getNestedType()))
+        return false;
+
+    auto serialization = part->getSerialization(command.column_name);
+    if (serialization->getKind() != ISerialization::Kind::DEFAULT)
+        return false;
+
+    return true;
+}
+
+static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, const MutationCommand & command, const ContextPtr & context)
+{
+    if (command.partition)
+    {
+        auto command_partition_id = part->storage.getPartitionIDFromQuery(command.partition, context);
+        if (part->info.partition_id != command_partition_id)
+            return true;
+    }
+
+    if (canSkipConversionToNullable(part, command))
+        return true;
+
+    return false;
+}
 
 bool MutateTask::prepare()
 {
@@ -1586,11 +1626,8 @@ bool MutateTask::prepare()
     context_for_reading->setSetting("force_primary_key", false);
 
     for (const auto & command : *ctx->commands)
-    {
-        if (command.partition == nullptr || ctx->source_part->info.partition_id == ctx->data->getPartitionIDFromQuery(
-                command.partition, context_for_reading))
+        if (!canSkipMutationCommandForPart(ctx->source_part, command, context_for_reading))
             ctx->commands_for_part.emplace_back(command);
-    }
 
     if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
         *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading))
diff --git a/tests/queries/0_stateless/02669_alter_modify_to_nullable.reference b/tests/queries/0_stateless/02669_alter_modify_to_nullable.reference
new file mode 100644
index 00000000000..aff80e1d699
--- /dev/null
+++ b/tests/queries/0_stateless/02669_alter_modify_to_nullable.reference
@@ -0,0 +1,8 @@
+1_1_1_0	String	Default
+2_2_2_0	String	Sparse
+20000	10435	['','bar','foo']
+1_1_1_0_3	String	Default
+2_2_2_0_3	Nullable(String)	Default
+20000	10435	['','bar','foo']
+1_1_1_0_3	0
+2_2_2_0_3	10000
diff --git a/tests/queries/0_stateless/02669_alter_modify_to_nullable.sql b/tests/queries/0_stateless/02669_alter_modify_to_nullable.sql
new file mode 100644
index 00000000000..862280fd7cd
--- /dev/null
+++ b/tests/queries/0_stateless/02669_alter_modify_to_nullable.sql
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS t_modify_to_nullable;
+
+CREATE TABLE t_modify_to_nullable (key UInt64, id UInt64, s String)
+ENGINE = MergeTree ORDER BY id PARTITION BY key
+SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 0.9;
+
+INSERT INTO t_modify_to_nullable SELECT 1, number, 'foo' FROM numbers(10000);
+INSERT INTO t_modify_to_nullable SELECT 2, number, if (number % 23 = 0, 'bar', '') FROM numbers(10000);
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_modify_to_nullable' AND column = 's' AND active
+ORDER BY name;
+
+SELECT count(s), countIf(s != ''), arraySort(groupUniqArray(s)) FROM t_modify_to_nullable;
+
+SET mutations_sync = 2;
+ALTER TABLE t_modify_to_nullable MODIFY COLUMN s Nullable(String);
+
+SELECT name, type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_modify_to_nullable' AND column = 's' AND active
+ORDER BY name;
+
+SELECT count(s), countIf(s != ''), arraySort(groupUniqArray(s)) FROM t_modify_to_nullable;
+
+SYSTEM FLUSH LOGS;
+
+SELECT part_name, read_rows FROM system.part_log
+WHERE database = currentDatabase() AND table = 't_modify_to_nullable' AND event_type = 'MutatePart'
+ORDER BY part_name;
+
+DROP TABLE t_modify_to_nullable;

From 4de9c9152940e334bb7a971b1bf6fc6bacb6499c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 24 Feb 2023 01:48:06 +0100
Subject: [PATCH 078/333] add a test for transactions

---
 .../02434_cancel_insert_when_client_dies.sh   |   2 +-
 ...02435_rollback_cancelled_queries.reference |   3 +
 .../02435_rollback_cancelled_queries.sh       | 118 ++++++++++++++++++
 3 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02435_rollback_cancelled_queries.reference
 create mode 100755 tests/queries/0_stateless/02435_rollback_cancelled_queries.sh

diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index c2cf98f712f..6cae90a3cc3 100755
--- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -62,7 +62,7 @@ function thread_cancel
             SIGNAL="KILL"
         fi
         PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
-        if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID" || echo "$PID"; fi
+        if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID"; fi
         sleep 0.$RANDOM;
         sleep 0.$RANDOM;
         sleep 0.$RANDOM;
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference
new file mode 100644
index 00000000000..94c627bb5d8
--- /dev/null
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference
@@ -0,0 +1,3 @@
+1000000
+0	1
+1
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
new file mode 100755
index 00000000000..bc16c54cd1e
--- /dev/null
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# Tags: no-random-settings, no-ordinary-database
+# shellcheck disable=SC2009
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv"
+export TEST_MARK="02435_insert_${CLICKHOUSE_DATABASE}_"
+export SESSION="02435_session_${CLICKHOUSE_DATABASE}"
+
+$CLICKHOUSE_CLIENT -q 'select * from numbers(1000000) format TSV' > $DATA_FILE
+$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by sin(A) partition by intDiv(A, 100000)'
+
+function insert_data
+{
+    IMPLICIT=$(( RANDOM % 2 ))
+    SESSION_ID="${SESSION}_$RANDOM$RANDOM$RANDOM"
+    TXN_SETTINGS="session_id=$SESSION_ID&throw_on_unsupported_query_inside_transaction=0"
+    BEGIN=""
+    COMMIT=""
+    SETTINGS="query_id=$ID&$TXN_SETTINGS&max_insert_block_size=110000&min_insert_block_size_rows=110000"
+    if [[ "$IMPLICIT" -eq 0 ]]; then
+        $CLICKHOUSE_CURL -sS -d 'begin transaction' "$CLICKHOUSE_URL&$TXN_SETTINGS"
+        BEGIN="begin transaction;"
+        COMMIT=$(echo -ne "\n\ncommit")
+    else
+        TXN_SETTINGS="$TXN_SETTINGS&implicit_transaction=1"
+    fi
+
+    SETTINGS="query_id=$ID&$TXN_SETTINGS&max_insert_block_size=110000&min_insert_block_size_rows=110000"
+    # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation)
+    TRASH_SETTINGS="query_id=$ID&$TXN_SETTINGS&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=110000&max_block_size=10000&min_insert_block_size_bytes=0&min_insert_block_size_rows=110000&max_insert_block_size=110000"
+    TYPE=$(( RANDOM % 6 ))
+
+    if [[ "$TYPE" -eq 0 ]]; then
+        $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+    elif [[ "$TYPE" -eq 1 ]]; then
+        $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+    elif [[ "$TYPE" -eq 2 ]]; then
+        $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE
+    else
+        # client will send 1000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation)
+        $CLICKHOUSE_CLIENT --query_id="$ID" --throw_on_unsupported_query_inside_transaction=0 --implicit_transaction="$IMPLICIT" \
+            --max_block_size=1000 --max_insert_block_size=1000 --multiquery -q \
+            "${BEGIN}insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV$COMMIT" < $DATA_FILE \
+            | grep -Fv "Transaction is not in RUNNING state"
+    fi
+
+    if [[ "$IMPLICIT" -eq 0 ]]; then
+        $CLICKHOUSE_CURL -sS -d 'commit' "$CLICKHOUSE_URL&$TXN_SETTINGS" | grep -Faq "Transaction is not in RUNNING state" && $CLICKHOUSE_CURL -sS -d 'rollback' "$CLICKHOUSE_URL&$TXN_SETTINGS"
+    fi
+}
+
+export -f insert_data
+
+ID="02435_insert_init_${CLICKHOUSE_DATABASE}_$RANDOM"
+insert_data
+$CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
+
+function thread_insert
+{
+    # supress "Killed" messages from bash
+    while true; do
+        export ID="$TEST_MARK$RANDOM"
+        bash -c insert_data 2>&1| grep -Fav "Killed"
+    done
+}
+
+function thread_select
+{
+    while true; do
+        $CLICKHOUSE_CLIENT --implicit_transaction=1 -q "with (select count() from dedup_test) as c select throwIf(c % 1000000 != 0, 'Expected 1000000 * N rows, got ' || toString(c)) format Null"
+        sleep 0.$RANDOM;
+    done
+}
+
+function thread_cancel
+{
+    while true; do
+        SIGNAL="INT"
+        if (( RANDOM % 2 )); then
+            SIGNAL="KILL"
+        fi
+        PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
+        if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID"; fi
+        sleep 0.$RANDOM;
+    done
+}
+
+export -f thread_insert;
+export -f thread_select;
+export -f thread_cancel;
+
+TIMEOUT=20    # 5 seconds for each TYPE
+
+timeout $TIMEOUT bash -c thread_insert &
+timeout $TIMEOUT bash -c thread_select &
+timeout $TIMEOUT bash -c thread_cancel 2> /dev/null &
+
+wait
+wait_for_queries_to_finish
+
+$CLICKHOUSE_CLIENT -q 'system flush logs'
+
+ID="02435_insert_last_${CLICKHOUSE_DATABASE}_$RANDOM"
+insert_data
+
+$CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select count() % 1000000, count() > 0 from dedup_test'
+
+# We have to ignore stderr from thread_cancel, because our CI finds a bug in ps...
+# So use this query to check that thread_cancel do something
+$CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
+  message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
+  message like '%Connection reset by peer%')"
+
+$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table dedup_test"

From 6f4c742408f77202c0740829d0d10d2d756ea9e9 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 24 Feb 2023 02:42:45 +0100
Subject: [PATCH 079/333] fix

---
 .../0_stateless/02435_rollback_cancelled_queries.reference   | 2 +-
 .../queries/0_stateless/02435_rollback_cancelled_queries.sh  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference
index 94c627bb5d8..2d32c17ec7c 100644
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference
@@ -1,3 +1,3 @@
 1000000
-0	1
+0
 1
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
index bc16c54cd1e..18317960cdf 100755
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -42,7 +42,7 @@ function insert_data
         $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE
     else
         # client will send 1000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation)
-        $CLICKHOUSE_CLIENT --query_id="$ID" --throw_on_unsupported_query_inside_transaction=0 --implicit_transaction="$IMPLICIT" \
+        $CLICKHOUSE_CLIENT --stacktrace --query_id="$ID" --throw_on_unsupported_query_inside_transaction=0 --implicit_transaction="$IMPLICIT" \
             --max_block_size=1000 --max_insert_block_size=1000 --multiquery -q \
             "${BEGIN}insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV$COMMIT" < $DATA_FILE \
             | grep -Fv "Transaction is not in RUNNING state"
@@ -107,7 +107,8 @@ $CLICKHOUSE_CLIENT -q 'system flush logs'
 ID="02435_insert_last_${CLICKHOUSE_DATABASE}_$RANDOM"
 insert_data
 
-$CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select count() % 1000000, count() > 0 from dedup_test'
+$CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select throwIf(count() % 1000000 != 0 or count() = 0) from dedup_test' \
+  || $CLICKHOUSE_CLIENT -q "select name, rows, active, visible, creation_tid, creation_csn from system.parts where database=currentDatabase();"
 
 # We have to ignore stderr from thread_cancel, because our CI finds a bug in ps...
 # So use this query to check that thread_cancel do something

From 7122ebab4d696b45add7f3d40b066e31a4093b18 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 24 Feb 2023 03:06:33 +0100
Subject: [PATCH 080/333] fix clickhouse-test

---
 tests/clickhouse-test | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index f5d1099c3f4..cfd2546bbdd 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -73,13 +73,7 @@ def stringhash(s):
 
 # First and last lines of the log
 def trim_for_log(s):
-    if not s:
-        return s
-    lines = s.splitlines()
-    if len(lines) > 100:
-        return "\n".join(lines[:50] + ["#" * 100] + lines[-50:])
-    else:
-        return "\n".join(lines)
+    return s
 
 
 class HTTPError(Exception):

From 8227a6e7bfbdaf0d9eb9f71be909dccf3013a3cc Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 24 Feb 2023 09:12:08 +0000
Subject: [PATCH 081/333] Fix tests

---
 .../Passes/LogicalExpressionOptimizer.cpp     |  30 +--
 .../Passes/LogicalExpressionOptimizerPass.h   |  50 ++++-
 ...00621_regression_for_in_operator.reference |  10 +-
 .../00736_disjunction_optimisation.reference  | 210 ++++++------------
 ...ssions_optimizer_low_cardinality.reference |  20 +-
 ...imizer_removing_redundant_checks.reference |  89 ++++++++
 6 files changed, 221 insertions(+), 188 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
index c29d38c7865..85b44de74da 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
@@ -19,7 +19,6 @@ public:
 
     explicit LogicalExpressionOptimizerVisitor(ContextPtr context)
         : Base(std::move(context))
-        , cast_function_resolver(FunctionFactory::instance().get("_CAST", getContext()))
     {}
 
     void visitImpl(QueryTreeNodePtr & node)
@@ -86,15 +85,11 @@ private:
             bool collapse_to_false = false;
 
             if (const auto * lhs_literal = lhs->as<ConstantNode>())
-            {
                 collapse_to_false = has_and_with_different_constant(rhs, lhs_literal);
-            }
             else if (const auto * rhs_literal = rhs->as<ConstantNode>())
-            {
                 collapse_to_false = has_and_with_different_constant(lhs, rhs_literal);
-            }
             else
-                continue;
+                and_operands.push_back(argument);
 
             if (collapse_to_false)
             {
@@ -108,7 +103,7 @@ private:
         if (and_operands.size() == 1)
         {
             assert(!function_node.getResultType()->isNullable());
-            resolveAsCast(function_node, std::move(and_operands[0]));
+            node = std::move(and_operands[0]);
             return;
         }
 
@@ -210,7 +205,7 @@ private:
         if (or_operands.size() == 1)
         {
             assert(!function_node.getResultType()->isNullable());
-            resolveAsCast(function_node, std::move(or_operands[0]));
+            node = std::move(or_operands[0]);
             return;
         }
 
@@ -218,27 +213,8 @@ private:
         function_node.getArguments().getNodes() = std::move(or_operands);
         function_node.resolveAsFunction(or_function_resolver);
     }
-
-    void resolveAsCast(FunctionNode & function_node, QueryTreeNodePtr operand)
-    {
-        std::string cast_type = function_node.getResultType()->getName();
-        auto cast_type_constant_value = std::make_shared<ConstantValue>(std::move(cast_type), std::make_shared<DataTypeString>());
-        auto cast_type_constant_node = std::make_shared<ConstantNode>(std::move(cast_type_constant_value));
-
-        QueryTreeNodes arguments;
-        arguments.reserve(2);
-        arguments.push_back(std::move(operand));
-        arguments.push_back(std::move(cast_type_constant_node));
-
-        function_node.getArguments().getNodes() = std::move(arguments);
-
-        function_node.resolveAsFunction(cast_function_resolver);
-    }
-
-    const FunctionOverloadResolverPtr cast_function_resolver;
 };
 
-
 void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
     LogicalExpressionOptimizerVisitor visitor(std::move(context));
diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index 327ae3c3fee..b436c94fe4c 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -6,10 +6,12 @@ namespace DB
 {
 
 /**
- * This pass replaces chains of equality functions inside an OR with a single IN operator.
+ * This pass tries to do optimizations on logical expression
+ *
+ * Replaces chains of equality functions inside an OR with a single IN operator.
  * The replacement is done if:
- *  - rhs of the equality function is a constant
- *  - length of chain is at least 'optimize_min_equality_disjunction_chain_length' long OR lhs is LowCardinality
+ *  - one of the operands  of the equality function is a constant
+ *  - length of chain is at least 'optimize_min_equality_disjunction_chain_length' long OR the expression has type of LowCardinality
  *
  * E.g. (optimize_min_equality_disjunction_chain_length = 2)
  * -------------------------------
@@ -23,6 +25,48 @@ namespace DB
  * FROM TABLE
  * WHERE b = 'test' OR a IN (1, 2);
  * -------------------------------
+ *
+ * Removes duplicate OR checks
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 OR b = 'test' OR a = 1;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE a = 1 OR b = 'test';
+ * -------------------------------
+ *
+ * Replaces AND chains with a single constant.
+ * The replacement is done if:
+ *  - one of the operands  of the equality function is a constant
+ *  - constants are different for same expression
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 AND b = 'test' AND a = 2;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE 0;
+ * -------------------------------
+ *
+ * Removes duplicate AND checks
+ * -------------------------------
+ * SELECT *
+ * FROM table
+ * WHERE a = 1 AND b = 'test' AND a = 1;
+ *
+ * will be transformed into
+ *
+ * SELECT *
+ * FROM TABLE
+ * WHERE a = 1 AND b = 'test';
+ * -------------------------------
  */
 
 class LogicalExpressionOptimizerPass final : public IQueryTreePass
diff --git a/tests/queries/0_stateless/00621_regression_for_in_operator.reference b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
index 54b345b65c0..ab8bcf307eb 100644
--- a/tests/queries/0_stateless/00621_regression_for_in_operator.reference
+++ b/tests/queries/0_stateless/00621_regression_for_in_operator.reference
@@ -12,15 +12,11 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.regression_for_in_operator_view
   WHERE
-    FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 5, nodes: 2
-          FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-            ARGUMENTS
-              LIST id: 7, nodes: 2
-                COLUMN id: 8, column_name: g, result_type: String, source_id: 3
-                CONSTANT id: 9, constant_value: Tuple_(\'5\', \'6\'), constant_value_type: Tuple(String, String)
-          CONSTANT id: 10, constant_value: \'UInt8\', constant_value_type: String
+          COLUMN id: 6, column_name: g, result_type: String, source_id: 3
+          CONSTANT id: 7, constant_value: Tuple_(\'5\', \'6\'), constant_value_type: Tuple(String, String)
   SETTINGS allow_experimental_analyzer=1
 2
 2
diff --git a/tests/queries/0_stateless/00736_disjunction_optimisation.reference b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
index 4b899527913..84477a64057 100644
--- a/tests/queries/0_stateless/00736_disjunction_optimisation.reference
+++ b/tests/queries/0_stateless/00736_disjunction_optimisation.reference
@@ -39,24 +39,16 @@ QUERY id: 0
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 6, nodes: 2
-          FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: UInt8
+          FUNCTION id: 7, function_name: in, function_type: ordinary, result_type: UInt8
             ARGUMENTS
               LIST id: 8, nodes: 2
-                FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
-                  ARGUMENTS
-                    LIST id: 10, nodes: 2
-                      COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 3
-                      CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 13, constant_value: \'UInt8\', constant_value_type: String
-          FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: UInt8
+                COLUMN id: 9, column_name: k, result_type: UInt64, source_id: 3
+                CONSTANT id: 10, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+          FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
             ARGUMENTS
-              LIST id: 15, nodes: 2
-                FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
-                  ARGUMENTS
-                    LIST id: 17, nodes: 2
-                      COLUMN id: 18, column_name: s, result_type: UInt64, source_id: 3
-                      CONSTANT id: 19, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 20, constant_value: \'UInt8\', constant_value_type: String
+              LIST id: 12, nodes: 2
+                COLUMN id: 13, column_name: s, result_type: UInt64, source_id: 3
+                CONSTANT id: 14, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   SETTINGS allow_experimental_analyzer=1
 1	21
 1	22
@@ -96,25 +88,17 @@ QUERY id: 0
       JOIN TREE
         TABLE id: 7, table_name: default.bug
       WHERE
-        FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: UInt8
+        FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
           ARGUMENTS
             LIST id: 10, nodes: 2
-              FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
-                ARGUMENTS
-                  LIST id: 12, nodes: 2
-                    COLUMN id: 13, column_name: k, result_type: UInt64, source_id: 7
-                    CONSTANT id: 14, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-              CONSTANT id: 15, constant_value: \'UInt8\', constant_value_type: String
+              COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 7
+              CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   WHERE
-    FUNCTION id: 16, function_name: _CAST, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 13, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
-        LIST id: 17, nodes: 2
-          FUNCTION id: 18, function_name: in, function_type: ordinary, result_type: UInt8
-            ARGUMENTS
-              LIST id: 19, nodes: 2
-                COLUMN id: 20, column_name: s, result_type: UInt64, source_id: 3
-                CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-          CONSTANT id: 22, constant_value: \'UInt8\', constant_value_type: String
+        LIST id: 14, nodes: 2
+          COLUMN id: 15, column_name: s, result_type: UInt64, source_id: 3
+          CONSTANT id: 16, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   SETTINGS allow_experimental_analyzer=1
 1	1	21	1	1	1
 1	1	22	0	1	1
@@ -145,39 +129,27 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 6
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
-            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 7, nodes: 2
-                  COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
-                  CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
-      COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
+            COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+            CONSTANT id: 6, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+      COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 8, function_name: equals, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 9, nodes: 2
+            COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 10, constant_value: UInt64_21, constant_value_type: UInt8
+      FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 12, nodes: 2
-            COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-            CONSTANT id: 13, constant_value: UInt64_21, constant_value_type: UInt8
-      FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: UInt8
+            COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 13, constant_value: Tuple_(UInt64_21, UInt64_22), constant_value_type: Tuple(UInt8, UInt8)
+      FUNCTION id: 14, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 15, nodes: 2
-            FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 17, nodes: 2
-                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 18, constant_value: Tuple_(UInt64_21, UInt64_22), constant_value_type: Tuple(UInt8, UInt8)
-            CONSTANT id: 19, constant_value: \'UInt8\', constant_value_type: String
-      FUNCTION id: 20, function_name: _CAST, function_type: ordinary, result_type: UInt8
-        ARGUMENTS
-          LIST id: 21, nodes: 2
-            FUNCTION id: 22, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 23, nodes: 2
-                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 24, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 25, constant_value: \'UInt8\', constant_value_type: String
+            COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 16, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -206,15 +178,11 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 2
       COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
-            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 7, nodes: 2
-                  COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
+            COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 6, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -259,24 +227,16 @@ QUERY id: 0
     FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 6, nodes: 2
-          FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: UInt8
+          FUNCTION id: 7, function_name: in, function_type: ordinary, result_type: UInt8
             ARGUMENTS
               LIST id: 8, nodes: 2
-                FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
-                  ARGUMENTS
-                    LIST id: 10, nodes: 2
-                      COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 3
-                      CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 13, constant_value: \'UInt8\', constant_value_type: String
-          FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: UInt8
+                COLUMN id: 9, column_name: k, result_type: UInt64, source_id: 3
+                CONSTANT id: 10, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+          FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
             ARGUMENTS
-              LIST id: 15, nodes: 2
-                FUNCTION id: 16, function_name: in, function_type: ordinary, result_type: UInt8
-                  ARGUMENTS
-                    LIST id: 17, nodes: 2
-                      COLUMN id: 18, column_name: s, result_type: UInt64, source_id: 3
-                      CONSTANT id: 19, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-                CONSTANT id: 20, constant_value: \'UInt8\', constant_value_type: String
+              LIST id: 12, nodes: 2
+                COLUMN id: 13, column_name: s, result_type: UInt64, source_id: 3
+                CONSTANT id: 14, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   SETTINGS allow_experimental_analyzer=1
 1	21
 1	22
@@ -316,25 +276,17 @@ QUERY id: 0
       JOIN TREE
         TABLE id: 7, table_name: default.bug
       WHERE
-        FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: UInt8
+        FUNCTION id: 9, function_name: in, function_type: ordinary, result_type: UInt8
           ARGUMENTS
             LIST id: 10, nodes: 2
-              FUNCTION id: 11, function_name: in, function_type: ordinary, result_type: UInt8
-                ARGUMENTS
-                  LIST id: 12, nodes: 2
-                    COLUMN id: 13, column_name: k, result_type: UInt64, source_id: 7
-                    CONSTANT id: 14, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-              CONSTANT id: 15, constant_value: \'UInt8\', constant_value_type: String
+              COLUMN id: 11, column_name: k, result_type: UInt64, source_id: 7
+              CONSTANT id: 12, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   WHERE
-    FUNCTION id: 16, function_name: _CAST, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 13, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
-        LIST id: 17, nodes: 2
-          FUNCTION id: 18, function_name: in, function_type: ordinary, result_type: UInt8
-            ARGUMENTS
-              LIST id: 19, nodes: 2
-                COLUMN id: 20, column_name: s, result_type: UInt64, source_id: 3
-                CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-          CONSTANT id: 22, constant_value: \'UInt8\', constant_value_type: String
+        LIST id: 14, nodes: 2
+          COLUMN id: 15, column_name: s, result_type: UInt64, source_id: 3
+          CONSTANT id: 16, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   SETTINGS allow_experimental_analyzer=1
 1	1	21	1	1	1
 1	1	22	0	1	1
@@ -365,43 +317,35 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 6
       COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
-            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 7, nodes: 2
-                  COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
-                  CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
-      COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8
+            COLUMN id: 2, column_name: k, result_type: UInt64, source_id: 3
+            CONSTANT id: 6, constant_value: Tuple_(UInt64_1, UInt64_2, UInt64_3), constant_value_type: Tuple(UInt8, UInt8, UInt8)
+      COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+      FUNCTION id: 8, function_name: equals, function_type: ordinary, result_type: UInt8
+        ARGUMENTS
+          LIST id: 9, nodes: 2
+            COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 10, constant_value: UInt64_21, constant_value_type: UInt8
+      FUNCTION id: 11, function_name: or, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 12, nodes: 2
-            COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-            CONSTANT id: 13, constant_value: UInt64_21, constant_value_type: UInt8
-      FUNCTION id: 14, function_name: or, function_type: ordinary, result_type: UInt8
-        ARGUMENTS
-          LIST id: 15, nodes: 2
+            FUNCTION id: 13, function_name: equals, function_type: ordinary, result_type: UInt8
+              ARGUMENTS
+                LIST id: 14, nodes: 2
+                  COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 15, constant_value: UInt64_21, constant_value_type: UInt8
             FUNCTION id: 16, function_name: equals, function_type: ordinary, result_type: UInt8
               ARGUMENTS
                 LIST id: 17, nodes: 2
-                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 18, constant_value: UInt64_21, constant_value_type: UInt8
-            FUNCTION id: 19, function_name: equals, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 20, nodes: 2
-                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 21, constant_value: UInt64_22, constant_value_type: UInt8
-      FUNCTION id: 22, function_name: _CAST, function_type: ordinary, result_type: UInt8
+                  COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+                  CONSTANT id: 18, constant_value: UInt64_22, constant_value_type: UInt8
+      FUNCTION id: 19, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
-          LIST id: 23, nodes: 2
-            FUNCTION id: 24, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 25, nodes: 2
-                  COLUMN id: 10, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 26, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 27, constant_value: \'UInt8\', constant_value_type: String
+          LIST id: 20, nodes: 2
+            COLUMN id: 7, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 21, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -430,15 +374,11 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 2
       COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
-            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 7, nodes: 2
-                  COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
+            COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 6, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
@@ -467,15 +407,11 @@ QUERY id: 0
   PROJECTION
     LIST id: 1, nodes: 2
       COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-      FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+      FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
         ARGUMENTS
           LIST id: 5, nodes: 2
-            FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-              ARGUMENTS
-                LIST id: 7, nodes: 2
-                  COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
-                  CONSTANT id: 8, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
-            CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
+            COLUMN id: 2, column_name: s, result_type: UInt64, source_id: 3
+            CONSTANT id: 6, constant_value: Tuple_(UInt64_21, UInt64_22, UInt64_23), constant_value_type: Tuple(UInt8, UInt8, UInt8)
   JOIN TREE
     TABLE id: 3, table_name: default.bug
   SETTINGS allow_experimental_analyzer=1
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
index 22bd68e0ac1..84589668d64 100644
--- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -10,15 +10,11 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
-    FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 5, nodes: 2
-          FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-            ARGUMENTS
-              LIST id: 7, nodes: 2
-                COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
-                CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
-          CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
+          COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+          CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
   SETTINGS allow_experimental_analyzer=1
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
@@ -32,15 +28,11 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality
   WHERE
-    FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8
+    FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8
       ARGUMENTS
         LIST id: 5, nodes: 2
-          FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: UInt8
-            ARGUMENTS
-              LIST id: 7, nodes: 2
-                COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
-                CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
-          CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String
+          COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3
+          CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String)
   SETTINGS allow_experimental_analyzer=1
 SELECT a
 FROM t_logical_expressions_optimizer_low_cardinality
diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
index e69de29bb2d..d083e178586 100644
--- a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
+++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference
@@ -0,0 +1,89 @@
+1	test
+3	another
+QUERY id: 0
+  PROJECTION COLUMNS
+    a Int32
+    b LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
+      COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.02668_logical_optimizer
+  WHERE
+    FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 6, nodes: 2
+          COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3
+          CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_3), constant_value_type: Tuple(UInt8, UInt8)
+1	test
+QUERY id: 0
+  PROJECTION COLUMNS
+    a Int32
+    b LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
+      COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.02668_logical_optimizer
+  WHERE
+    FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 6, nodes: 2
+          COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3
+          CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8
+QUERY id: 0
+  PROJECTION COLUMNS
+    a Int32
+    b LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
+      COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.02668_logical_optimizer
+  WHERE
+    CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8
+3	another
+QUERY id: 0
+  PROJECTION COLUMNS
+    a Int32
+    b LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
+      COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.02668_logical_optimizer
+  WHERE
+    FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 6, nodes: 2
+          FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8
+            ARGUMENTS
+              LIST id: 8, nodes: 2
+                COLUMN id: 9, column_name: a, result_type: Int32, source_id: 3
+                CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+          FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: LowCardinality(UInt8)
+            ARGUMENTS
+              LIST id: 12, nodes: 2
+                COLUMN id: 13, column_name: b, result_type: LowCardinality(String), source_id: 3
+                CONSTANT id: 14, constant_value: \'another\', constant_value_type: String
+2	test2
+QUERY id: 0
+  PROJECTION COLUMNS
+    a Int32
+    b LowCardinality(String)
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3
+      COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.02668_logical_optimizer
+  WHERE
+    FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8
+      ARGUMENTS
+        LIST id: 6, nodes: 2
+          COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3
+          CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8

From 12b38ece1d6924e0e7718eb57be41c2e83931cea Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Fri, 24 Feb 2023 13:07:09 +0000
Subject: [PATCH 082/333] Fix build

---
 src/Processors/Formats/Impl/AvroRowInputFormat.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 1427e6098ed..cb851c4a1e9 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -433,6 +433,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
                 {
                     decoder.decodeFixed(tmp_fixed.size(), tmp_fixed);
                     column.insertData(reinterpret_cast<const char *>(tmp_fixed.data()), tmp_fixed.size());
+                    return true;
                 };
             }
             break;

From 5024f11adc2557b1fbf775b312b3d290bfea97fc Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 24 Feb 2023 17:18:45 +0000
Subject: [PATCH 083/333] fix order in serialization info

---
 src/DataTypes/Serializations/SerializationInfo.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationInfo.h b/src/DataTypes/Serializations/SerializationInfo.h
index 4ae5cf8c193..7f73d053f1b 100644
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@@ -86,7 +86,8 @@ using MutableSerializationInfoPtr = std::shared_ptr<SerializationInfo>;
 using SerializationInfos = std::vector<SerializationInfoPtr>;
 using MutableSerializationInfos = std::vector<MutableSerializationInfoPtr>;
 
-class SerializationInfoByName : public std::unordered_map<String, MutableSerializationInfoPtr>
+/// The order is important because info is serialized to part metadata.
+class SerializationInfoByName : public std::map<String, MutableSerializationInfoPtr>
 {
 public:
     SerializationInfoByName() = default;

From 3a947ecddc5a9f30e91ef74e4f54da97b8c91f9e Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 24 Feb 2023 19:17:44 +0000
Subject: [PATCH 084/333] do not allow const and non-deterministic secondary
 indexes

---
 src/Storages/IndicesDescription.cpp           | 10 +------
 src/Storages/MergeTree/MergeTreeData.cpp      |  1 -
 src/Storages/MergeTree/MergeTreeIndices.cpp   | 26 +++++++++++++++++++
 .../02670_constant_skip_index.reference       |  0
 .../0_stateless/02670_constant_skip_index.sql | 25 ++++++++++++++++++
 5 files changed, 52 insertions(+), 10 deletions(-)
 create mode 100644 tests/queries/0_stateless/02670_constant_skip_index.reference
 create mode 100644 tests/queries/0_stateless/02670_constant_skip_index.sql

diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index 2e07aceeaa9..00a3636b605 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -94,15 +94,7 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
 
     auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical());
     result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);
-    Block block_without_columns = result.expression->getSampleBlock();
-
-    for (size_t i = 0; i < block_without_columns.columns(); ++i)
-    {
-        const auto & column = block_without_columns.getByPosition(i);
-        result.column_names.emplace_back(column.name);
-        result.data_types.emplace_back(column.type);
-        result.sample_block.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
-    }
+    result.sample_block = result.expression->getSampleBlock();
 
     const auto & definition_arguments = index_definition->type->arguments;
     if (definition_arguments)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 0d4e54453d7..f49ecba1385 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -524,7 +524,6 @@ void MergeTreeData::checkProperties(
 
         for (const auto & index : new_metadata.secondary_indices)
         {
-
             MergeTreeIndexFactory::instance().validate(index, attach);
 
             if (indices_names.find(index.name) != indices_names.end())
diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp
index 2be9ecd8de3..6ae96d00171 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -35,6 +35,7 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get(
 {
     auto it = creators.find(index.type);
     if (it == creators.end())
+    {
         throw Exception(ErrorCodes::INCORRECT_QUERY,
                 "Unknown Index type '{}'. Available index types: {}", index.type,
                 std::accumulate(creators.cbegin(), creators.cend(), std::string{},
@@ -46,6 +47,7 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get(
                                 return left + ", " + right.first;
                         })
                 );
+    }
 
     return it->second(index);
 }
@@ -61,8 +63,31 @@ MergeTreeIndices MergeTreeIndexFactory::getMany(const std::vector<IndexDescripti
 
 void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach) const
 {
+    /// Do not allow constant and non-deterministic expressions.
+    /// Do not throw on attach for compatibility.
+    if (!attach)
+    {
+        if (index.expression->hasArrayJoin())
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Secondary index '{}' cannot contain array joins", index.name);
+
+        try
+        {
+            index.expression->assertDeterministic();
+        }
+        catch (Exception & e)
+        {
+            e.addMessage(fmt::format("for secondary index '{}'", index.name));
+            throw;
+        }
+
+        for (const auto & elem : index.sample_block)
+            if (elem.column && (isColumnConst(*elem.column) || elem.column->isDummy()))
+                throw Exception(ErrorCodes::INCORRECT_QUERY, "Secondary index '{}' cannot contain constants", index.name);
+    }
+
     auto it = validators.find(index.type);
     if (it == validators.end())
+    {
         throw Exception(ErrorCodes::INCORRECT_QUERY,
             "Unknown Index type '{}'. Available index types: {}", index.type,
                 std::accumulate(
@@ -77,6 +102,7 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach
                             return left + ", " + right.first;
                     })
             );
+    }
 
     it->second(index, attach);
 }
diff --git a/tests/queries/0_stateless/02670_constant_skip_index.reference b/tests/queries/0_stateless/02670_constant_skip_index.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02670_constant_skip_index.sql b/tests/queries/0_stateless/02670_constant_skip_index.sql
new file mode 100644
index 00000000000..97dd2ab33c9
--- /dev/null
+++ b/tests/queries/0_stateless/02670_constant_skip_index.sql
@@ -0,0 +1,25 @@
+
+DROP TABLE IF EXISTS t_constant_index;
+
+CREATE TABLE t_constant_index
+(
+    id UInt64,
+    INDEX t_constant_index 'foo' TYPE set(2) GRANULARITY 1
+) ENGINE = MergeTree
+ORDER BY id; -- { serverError INCORRECT_QUERY }
+
+CREATE TABLE t_constant_index
+(
+    id UInt64,
+    INDEX t_constant_index id + rand() TYPE set(2) GRANULARITY 1
+) ENGINE = MergeTree
+ORDER BY id; -- { serverError BAD_ARGUMENTS }
+
+CREATE TABLE t_constant_index
+(
+    id UInt64,
+    INDEX t_constant_index id * 2 TYPE set(2) GRANULARITY 1
+) ENGINE = MergeTree
+ORDER BY id;
+
+DROP TABLE t_constant_index;

From cad1e0b7684a691116cef95f1c418df8ef4be740 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 25 Feb 2023 01:18:34 +0100
Subject: [PATCH 085/333] fix

---
 src/IO/WriteBufferFromPocoSocket.cpp          | 11 +++++---
 src/IO/WriteBufferFromPocoSocket.h            |  1 +
 src/Interpreters/executeQuery.cpp             | 15 ++++++-----
 src/QueryPipeline/BlockIO.cpp                 | 25 ++++++++++++++++++-
 src/QueryPipeline/BlockIO.h                   |  3 ++-
 src/Server/TCPHandler.cpp                     | 16 +++++++++---
 src/Storages/Distributed/DistributedSink.cpp  | 20 +++++++++++++++
 src/Storages/Distributed/DistributedSink.h    |  2 ++
 .../test_insert_into_distributed/test.py      |  2 +-
 .../02434_cancel_insert_when_client_dies.sh   | 13 +++++++---
 .../02435_rollback_cancelled_queries.sh       |  2 +-
 utils/check-mysql-binlog/main.cpp             |  2 +-
 12 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp
index e01245849ae..b15149c6f88 100644
--- a/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/src/IO/WriteBufferFromPocoSocket.cpp
@@ -64,7 +64,8 @@ void WriteBufferFromPocoSocket::nextImpl()
         }
         catch (const Poco::Net::NetException & e)
         {
-            throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({})", e.displayText(), peer_address.toString());
+            throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({} -> {})", e.displayText(),
+                               our_address.toString(), peer_address.toString());
         }
         catch (const Poco::TimeoutException &)
         {
@@ -74,18 +75,20 @@ void WriteBufferFromPocoSocket::nextImpl()
         }
         catch (const Poco::IOException & e)
         {
-            throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({})", e.displayText(), peer_address.toString());
+            throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({} -> {})", e.displayText(),
+                               our_address.toString(), peer_address.toString());
         }
 
         if (res < 0)
-            throw NetException(ErrorCodes::CANNOT_WRITE_TO_SOCKET, "Cannot write to socket ({})", peer_address.toString());
+            throw NetException(ErrorCodes::CANNOT_WRITE_TO_SOCKET, "Cannot write to socket ({} -> {})",
+                               our_address.toString(), peer_address.toString());
 
         bytes_written += res;
     }
 }
 
 WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
-    : BufferWithOwnMemory<WriteBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress())
+    : BufferWithOwnMemory<WriteBuffer>(buf_size), socket(socket_), peer_address(socket.peerAddress()), our_address(socket.address())
 {
 }
 
diff --git a/src/IO/WriteBufferFromPocoSocket.h b/src/IO/WriteBufferFromPocoSocket.h
index 295ca16ecaf..ffe1176c8cd 100644
--- a/src/IO/WriteBufferFromPocoSocket.h
+++ b/src/IO/WriteBufferFromPocoSocket.h
@@ -28,6 +28,7 @@ protected:
       *  (getpeername will return an error).
       */
     Poco::Net::SocketAddress peer_address;
+    Poco::Net::SocketAddress our_address;
 };
 
 }
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index f3d83a37c59..5e1e4cb58fb 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -176,7 +176,7 @@ static void setExceptionStackTrace(QueryLogElement & elem)
 
 
 /// Log exception (with query info) into text log (not into system table).
-static void logException(ContextPtr context, QueryLogElement & elem)
+static void logException(ContextPtr context, QueryLogElement & elem, bool log_error = true)
 {
     String comment;
     if (!elem.log_comment.empty())
@@ -187,7 +187,7 @@ static void logException(ContextPtr context, QueryLogElement & elem)
     PreformattedMessage message;
     message.format_string = elem.exception_format_string;
 
-    if (elem.stack_trace.empty())
+    if (elem.stack_trace.empty() || !log_error)
         message.text = fmt::format("{} (from {}){} (in query: {})", elem.exception,
                         context->getClientInfo().current_address.toString(),
                         comment,
@@ -201,7 +201,10 @@ static void logException(ContextPtr context, QueryLogElement & elem)
             toOneLineQuery(elem.query),
             elem.stack_trace);
 
-    LOG_ERROR(&Poco::Logger::get("executeQuery"), message);
+    if (log_error)
+        LOG_ERROR(&Poco::Logger::get("executeQuery"), message);
+    else
+        LOG_INFO(&Poco::Logger::get("executeQuery"), message);
 }
 
 static void onExceptionBeforeStart(
@@ -1101,7 +1104,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                                        quota(quota),
                                        status_info_to_query_log,
                                        implicit_txn_control,
-                                       query_span]() mutable
+                                       query_span](bool log_error) mutable
             {
                 if (implicit_txn_control)
                 {
@@ -1139,9 +1142,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                     elem.query_duration_ms = start_watch.elapsedMilliseconds();
                 }
 
-                if (current_settings.calculate_text_stack_trace)
+                if (current_settings.calculate_text_stack_trace && log_error)
                     setExceptionStackTrace(elem);
-                logException(context, elem);
+                logException(context, elem, log_error);
 
                 /// In case of exception we log internal queries also
                 if (log_queries && elem.type >= log_queries_min_type && static_cast<Int64>(elem.query_duration_ms) >= log_queries_min_query_duration_ms)
diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp
index 9af7cd2b772..231c369707e 100644
--- a/src/QueryPipeline/BlockIO.cpp
+++ b/src/QueryPipeline/BlockIO.cpp
@@ -4,6 +4,10 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int QUERY_WAS_CANCELLED;
+}
 
 void BlockIO::reset()
 {
@@ -58,7 +62,26 @@ void BlockIO::onFinish()
 void BlockIO::onException()
 {
     if (exception_callback)
-        exception_callback();
+        exception_callback(/* log_error */ true);
+
+    pipeline.reset();
+}
+
+void BlockIO::onCancelOrConnectionLoss()
+{
+    /// Query was not finished gracefully, so we should call exception_callback
+    /// But we don't have a real exception
+    if (exception_callback)
+    {
+        try
+        {
+            throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled or a client has unexpectedly dropped the connection");
+        }
+        catch (...)
+        {
+            exception_callback(/* log_error */ false);
+        }
+    }
 
     pipeline.reset();
 }
diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h
index 4c8d29d0ba8..ff85a0d6772 100644
--- a/src/QueryPipeline/BlockIO.h
+++ b/src/QueryPipeline/BlockIO.h
@@ -26,13 +26,14 @@ struct BlockIO
 
     /// Callbacks for query logging could be set here.
     std::function<void(QueryPipeline &)> finish_callback;
-    std::function<void()> exception_callback;
+    std::function<void(bool)> exception_callback;
 
     /// When it is true, don't bother sending any non-empty blocks to the out stream
     bool null_format = false;
 
     void onFinish();
     void onException();
+    void onCancelOrConnectionLoss();
 
     /// Set is_all_data_sent in system.processes for this query.
     void setAllDataSent() const;
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 1f06f6a4ae9..9f53e418aec 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -416,17 +416,25 @@ void TCPHandler::runImpl()
             after_check_cancelled.restart();
             after_send_progress.restart();
 
+            auto finish_or_cancel = [this]()
+            {
+                if (state.is_cancelled)
+                    state.io.onCancelOrConnectionLoss();
+                else
+                    state.io.onFinish();
+            };
+
             if (state.io.pipeline.pushing())
             {
                 /// FIXME: check explicitly that insert query suggests to receive data via native protocol,
                 state.need_receive_data_for_insert = true;
                 processInsertQuery();
-                state.io.onFinish();
+                finish_or_cancel();
             }
             else if (state.io.pipeline.pulling())
             {
                 processOrdinaryQueryWithProcessors();
-                state.io.onFinish();
+                finish_or_cancel();
             }
             else if (state.io.pipeline.completed())
             {
@@ -455,7 +463,7 @@ void TCPHandler::runImpl()
                     executor.execute();
                 }
 
-                state.io.onFinish();
+                finish_or_cancel();
 
                 std::lock_guard lock(task_callback_mutex);
 
@@ -469,7 +477,7 @@ void TCPHandler::runImpl()
             }
             else
             {
-                state.io.onFinish();
+                finish_or_cancel();
             }
 
             /// Do it before sending end of stream, to have a chance to show log message in client.
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index bac13ea37cf..b2bbd02f879 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -569,6 +569,26 @@ void DistributedSink::onFinish()
     }
 }
 
+void DistributedSink::onCancel()
+{
+    if (pool && !pool->finished())
+    {
+        try
+        {
+            pool->wait();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(storage.log);
+        }
+    }
+
+    for (auto & shard_jobs : per_shard_jobs)
+        for (JobReplica & job : shard_jobs.replicas_jobs)
+            if (job.executor)
+                job.executor->cancel();
+}
+
 
 IColumn::Selector DistributedSink::createSelector(const Block & source_block) const
 {
diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h
index af0c64cbd78..325d5859289 100644
--- a/src/Storages/Distributed/DistributedSink.h
+++ b/src/Storages/Distributed/DistributedSink.h
@@ -54,6 +54,8 @@ public:
     void onFinish() override;
 
 private:
+    void onCancel() override;
+
     IColumn::Selector createSelector(const Block & source_block) const;
 
     void writeAsync(const Block & block);
diff --git a/tests/integration/test_insert_into_distributed/test.py b/tests/integration/test_insert_into_distributed/test.py
index a52809f817c..3bee2149387 100644
--- a/tests/integration/test_insert_into_distributed/test.py
+++ b/tests/integration/test_insert_into_distributed/test.py
@@ -288,7 +288,7 @@ def test_inserts_single_replica_no_internal_replication(started_cluster):
                     "prefer_localhost_replica": "0",
                 },
             )
-        assert node2.query("SELECT count(*) FROM single_replicated").strip() == "1"
+        assert node2.query("SELECT count(*) FROM single_replicated").strip() == "0"
     finally:
         node2.query("TRUNCATE TABLE single_replicated")
 
diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index 6cae90a3cc3..2e653f2f1a3 100755
--- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -11,20 +11,25 @@ export TEST_MARK="02434_insert_${CLICKHOUSE_DATABASE}_"
 
 $CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE
 $CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;'
+$CLICKHOUSE_CLIENT -q "create table dedup_dist(A Int64) Engine = Distributed('test_cluster_one_shard_two_replicas', currentDatabase(), dedup_test)"
 
 function insert_data
 {
     SETTINGS="query_id=$ID&max_insert_block_size=110000&min_insert_block_size_rows=110000"
     # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation)
     TRASH_SETTINGS="query_id=$ID&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=110000&max_block_size=10000&min_insert_block_size_bytes=0&min_insert_block_size_rows=110000&max_insert_block_size=110000"
-    TYPE=$(( RANDOM % 4 ))
+    TYPE=$(( RANDOM % 5 ))
 
     if [[ "$TYPE" -eq 0 ]]; then
         # client will send 10000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation)
-        $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" -q 'insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE
+        $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" \
+            -q 'insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE
     elif [[ "$TYPE" -eq 1 ]]; then
-        $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+        $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" --prefer_localhost_replica="$(( RANDOM % 2))" \
+            -q 'insert into dedup_dist settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE
     elif [[ "$TYPE" -eq 2 ]]; then
+        $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
+    elif [[ "$TYPE" -eq 3 ]]; then
         $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE
     else
         $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE
@@ -73,7 +78,7 @@ export -f thread_insert;
 export -f thread_select;
 export -f thread_cancel;
 
-TIMEOUT=40    # 10 seconds for each TYPE
+TIMEOUT=40
 
 timeout $TIMEOUT bash -c thread_insert &
 timeout $TIMEOUT bash -c thread_select &
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
index 18317960cdf..a69e526c1c8 100755
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -114,6 +114,6 @@ $CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select throwIf(count() % 1000000
 # So use this query to check that thread_cancel do something
 $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
   message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
-  message like '%Connection reset by peer%')"
+  message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
 
 $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table dedup_test"
diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp
index cf2a27e8aac..68558340180 100644
--- a/utils/check-mysql-binlog/main.cpp
+++ b/utils/check-mysql-binlog/main.cpp
@@ -18,7 +18,7 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody(
 {
     DB::MySQLReplication::BinlogEventPtr event;
     DB::ReadBufferPtr limit_read_buffer = std::make_shared<DB::LimitReadBuffer>(payload, header.event_size - 19,
-                                                                                /* trow_exception */ false, /* exact_limit */ {});
+                                                                                /* trow_exception */ false, /* exact_limit */ std::nullopt);
     DB::ReadBufferPtr event_payload = std::make_shared<DB::MySQLBinlogEventReadBuffer>(*limit_read_buffer, exist_checksum ? 4 : 0);
 
     switch (header.type)

From 940035f1086a7e7c6bdb375304d9166024ebddd4 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 25 Feb 2023 01:31:38 +0100
Subject: [PATCH 086/333] fix

---
 .../queries/0_stateless/02434_cancel_insert_when_client_dies.sh  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index 2e653f2f1a3..2a17095b267 100755
--- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -40,6 +40,7 @@ export -f insert_data
 
 ID="02434_insert_init_${CLICKHOUSE_DATABASE}_$RANDOM"
 insert_data
+$CLICKHOUSE_CLIENT -q "system flush distributed dedup_dist"
 $CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
 
 function thread_insert

From 4b5d62d5bf34b534693b9f8f579ed87ecf25f1e5 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 26 Feb 2023 22:19:01 +0000
Subject: [PATCH 087/333] Analyzer: trivial count optimization

---
 src/Planner/PlannerJoinTree.cpp | 126 ++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 59b09f91888..36eb5c59ef5 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -39,6 +39,14 @@
 #include <Planner/PlannerJoins.h>
 #include <Planner/PlannerActionsVisitor.h>
 #include <Planner/Utils.h>
+#include <AggregateFunctions/AggregateFunctionCount.h>
+#include <Interpreters/TreeRewriter.h>
+#include "Analyzer/AggregationUtils.h"
+#include "Analyzer/FunctionNode.h"
+#include <Processors/Sources/SourceFromSingleChunk.h>
+#include <Columns/ColumnAggregateFunction.h>
+#include <Common/scope_guard_safe.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
 
 namespace DB
 {
@@ -143,6 +151,119 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage
     return result;
 }
 
+bool applyTrivialCountIfPossible(
+    QueryPlan & query_plan,
+    const TableNode & table_node,
+    const SelectQueryInfo & select_query_info,
+    const ContextPtr & query_context,
+    const Names & columns_names)
+{
+    const auto & settings = query_context->getSettingsRef();
+    if (!settings.optimize_trivial_count_query)
+        return false;
+
+    const auto & storage = table_node.getStorage();
+    const auto & storage_snapshot = table_node.getStorageSnapshot();
+
+    bool can_apply_trivial_count = (settings.max_parallel_replicas <= 1) && //
+        !settings.allow_experimental_query_deduplication && //
+        !settings.empty_result_for_aggregation_by_empty_set && //
+        storage && //
+        storage->getName() != "MaterializedMySQL" && //
+        !storage->hasLightweightDeletedMask() && //
+        select_query_info.filter_asts.empty() && // ???
+        select_query_info.has_aggregates;
+    if (!can_apply_trivial_count)
+        return false;
+
+    QueryTreeNodes aggregates = collectAggregateFunctionNodes(select_query_info.query_tree);
+    if (aggregates.size() != 1)
+        return false;
+
+    auto & main_query_node = select_query_info.query_tree->as<QueryNode &>();
+    /// dump main query tree
+    {
+        WriteBufferFromOwnString buffer;
+        IQueryTreeNode::FormatState format_state;
+        main_query_node.dumpTreeImpl(buffer, format_state, 0);
+
+        LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "main_query_node:\n{}", buffer.str());
+        LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "Projection column:\n{}", main_query_node.getProjectionColumns().front().dump());
+    }
+
+    {
+        WriteBufferFromOwnString buffer;
+        buffer << columns_names;
+        LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "{}", buffer.str());
+    }
+
+    const auto * function_node = typeid_cast<const FunctionNode *>(aggregates.front().get());
+    if (!function_node)
+        return false;
+
+    if (!function_node->getAggregateFunction())
+        return false;
+
+    LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "Aggregation: {}", function_node->getFunctionName());
+
+    const auto * count_func = typeid_cast<const AggregateFunctionCount *>(function_node->getAggregateFunction().get());
+    if (!count_func)
+        return false;
+
+    /// get number of rows
+    std::optional<UInt64> num_rows{};
+    // if (!query_tree.  prewhere() && !query.where() && !context->getCurrentTransaction())
+    if (!main_query_node.hasPrewhere() && !main_query_node.hasWhere())
+    {
+        num_rows = storage->totalRows(settings);
+    }
+    // else // It's possible to optimize count() given only partition predicates
+    // {
+    //     SelectQueryInfo temp_query_info;
+    //     temp_query_info.query = query_ptr;
+    //     temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
+    //     temp_query_info.prepared_sets = query_analyzer->getPreparedSets();
+    //     num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, context);
+    // }
+
+    if (!num_rows)
+        return false;
+
+    LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "Number of rows: {}", num_rows.value());
+
+    /// set aggregation state
+    const AggregateFunctionCount & agg_count = *count_func;
+    std::vector<char> state(agg_count.sizeOfData());
+    AggregateDataPtr place = state.data();
+    agg_count.create(place);
+    // SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
+    agg_count.set(place, num_rows.value());
+
+    auto column = ColumnAggregateFunction::create(function_node->getAggregateFunction());
+    column->insertFrom(place);
+
+    /// get count() argument type
+    DataTypes argument_types;
+    argument_types.reserve(columns_names.size());
+    {
+        const Block source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
+        for (const auto & column_name : columns_names)
+            argument_types.push_back(source_header.getByName(column_name).type);
+    }
+
+    Block block_with_count{
+        {std::move(column),
+         std::make_shared<DataTypeAggregateFunction>(function_node->getAggregateFunction(), argument_types, Array{}),
+         columns_names.front()}};
+
+    auto source = std::make_shared<SourceFromSingleChunk>(block_with_count);
+    auto prepared_count = std::make_unique<ReadFromPreparedSource>(Pipe(std::move(source)));
+    prepared_count->setStepDescription("Optimized trivial count");
+    query_plan.addStep(std::move(prepared_count));
+
+    return true;
+}
+
 JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression,
     const SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
@@ -287,6 +408,11 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
             table_expression_data.addColumn(additional_column_to_read, column_identifier);
         }
 
+        /// apply trivial_count optimization if possible
+        if (is_single_table_expression && table_node
+            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info, planner_context->getQueryContext(), columns_names))
+            return {std::move(query_plan), from_stage};
+
         bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names);
         if (need_rewrite_query_with_final)
         {

From 1e4d9e2421091468bddcce8f59191b87865a05c3 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 26 Feb 2023 22:37:06 +0000
Subject: [PATCH 088/333] try to fix

---
 src/Planner/PlannerJoinTree.cpp | 50 ++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 36eb5c59ef5..593bdd7dece 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -408,11 +408,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
             table_expression_data.addColumn(additional_column_to_read, column_identifier);
         }
 
-        /// apply trivial_count optimization if possible
-        if (is_single_table_expression && table_node
-            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info, planner_context->getQueryContext(), columns_names))
-            return {std::move(query_plan), from_stage};
-
         bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names);
         if (need_rewrite_query_with_final)
         {
@@ -434,28 +429,39 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
             }
         }
 
-        storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams);
+        /// apply trivial_count optimization if possible
+        bool is_trivial_count_applied = (is_single_table_expression && table_node
+            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info, planner_context->getQueryContext(), columns_names));
 
-        if (query_plan.isInitialized())
+        if (is_trivial_count_applied)
         {
-            /** Specify the number of threads only if it wasn't specified in storage.
-              *
-              * But in case of remote query and prefer_localhost_replica=1 (default)
-              * The inner local query (that is done in the same process, without
-              * network interaction), it will setMaxThreads earlier and distributed
-              * query will not update it.
-              */
-            if (!query_plan.getMaxThreads() || is_remote)
-                query_plan.setMaxThreads(max_threads_execute_query);
+            from_stage = QueryProcessingStage::WithMergeableState;
         }
         else
         {
-            /// Create step which reads from empty source if storage has no data.
-            auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
-            Pipe pipe(std::make_shared<NullSource>(source_header));
-            auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
-            read_from_pipe->setStepDescription("Read from NullSource");
-            query_plan.addStep(std::move(read_from_pipe));
+            storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams);
+
+            if (query_plan.isInitialized())
+            {
+                /** Specify the number of threads only if it wasn't specified in storage.
+                  *
+                  * But in case of remote query and prefer_localhost_replica=1 (default)
+                  * The inner local query (that is done in the same process, without
+                  * network interaction), it will setMaxThreads earlier and distributed
+                  * query will not update it.
+                  */
+                if (!query_plan.getMaxThreads() || is_remote)
+                    query_plan.setMaxThreads(max_threads_execute_query);
+            }
+            else
+            {
+                /// Create step which reads from empty source if storage has no data.
+                auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
+                Pipe pipe(std::make_shared<NullSource>(source_header));
+                auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
+                read_from_pipe->setStepDescription("Read from NullSource");
+                query_plan.addStep(std::move(read_from_pipe));
+            }
         }
     }
     else if (query_node || union_node)

From f9a324c47a50e67dfe1933677924e08f921a40e7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 27 Feb 2023 09:48:24 +0000
Subject: [PATCH 089/333] Make diagnostic traces debug level

---
 src/Planner/PlannerJoinTree.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 593bdd7dece..b1e22b40d69 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -187,14 +187,14 @@ bool applyTrivialCountIfPossible(
         IQueryTreeNode::FormatState format_state;
         main_query_node.dumpTreeImpl(buffer, format_state, 0);
 
-        LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "main_query_node:\n{}", buffer.str());
-        LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "Projection column:\n{}", main_query_node.getProjectionColumns().front().dump());
+        LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "main_query_node:\n{}", buffer.str());
+        LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "Projection column:\n{}", main_query_node.getProjectionColumns().front().dump());
     }
 
     {
         WriteBufferFromOwnString buffer;
         buffer << columns_names;
-        LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "{}", buffer.str());
+        LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "{}", buffer.str());
     }
 
     const auto * function_node = typeid_cast<const FunctionNode *>(aggregates.front().get());
@@ -204,7 +204,7 @@ bool applyTrivialCountIfPossible(
     if (!function_node->getAggregateFunction())
         return false;
 
-    LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "Aggregation: {}", function_node->getFunctionName());
+    LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "Aggregation: {}", function_node->getFunctionName());
 
     const auto * count_func = typeid_cast<const AggregateFunctionCount *>(function_node->getAggregateFunction().get());
     if (!count_func)
@@ -229,7 +229,7 @@ bool applyTrivialCountIfPossible(
     if (!num_rows)
         return false;
 
-    LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), "Number of rows: {}", num_rows.value());
+    LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "Number of rows: {}", num_rows.value());
 
     /// set aggregation state
     const AggregateFunctionCount & agg_count = *count_func;

From cac9e96d0415f52d27a96c873c981f37bd8a2ed7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 27 Feb 2023 10:56:59 +0000
Subject: [PATCH 090/333] Fix: do not apply if FINAL

---
 src/Planner/PlannerJoinTree.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index b1e22b40d69..9c005314421 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -162,6 +162,10 @@ bool applyTrivialCountIfPossible(
     if (!settings.optimize_trivial_count_query)
         return false;
 
+    /// can't apply if FINAL
+    if (select_query_info.table_expression_modifiers.has_value() && select_query_info.table_expression_modifiers.value().hasFinal())
+        return false;
+
     const auto & storage = table_node.getStorage();
     const auto & storage_snapshot = table_node.getStorageSnapshot();
 
@@ -430,8 +434,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
         }
 
         /// apply trivial_count optimization if possible
-        bool is_trivial_count_applied = (is_single_table_expression && table_node
-            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info, planner_context->getQueryContext(), columns_names));
+        bool is_trivial_count_applied = is_single_table_expression && table_node
+            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info, planner_context->getQueryContext(), columns_names);
 
         if (is_trivial_count_applied)
         {

From 67099f2a65a78e8dc898510a4f700b36603fd067 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 27 Feb 2023 12:53:09 +0000
Subject: [PATCH 091/333] Tests

---
 .../02674_trivial_count_analyzer.reference    | 48 +++++++++++++++++++
 .../02674_trivial_count_analyzer.sql          | 46 ++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 tests/queries/0_stateless/02674_trivial_count_analyzer.reference
 create mode 100644 tests/queries/0_stateless/02674_trivial_count_analyzer.sql

diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.reference b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference
new file mode 100644
index 00000000000..2a94fd59d7b
--- /dev/null
+++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference
@@ -0,0 +1,48 @@
+-- { echoOn }
+set allow_experimental_analyzer=1;
+set optimize_trivial_count_query=1;
+create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple();
+select count() from m3;
+0
+insert into m3 values (0,0);
+insert into m3 values (-1,1);
+select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
+ReadFromPreparedSource (Optimized trivial count)
+select count() from m3;
+2
+select count(*) from m3;
+2
+select count(a) from m3;
+2
+select count(b) from m3;
+2
+select count() + 1 from m3;
+3
+-- drop table m3;
+
+-- checking queries with FINAL
+create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b);
+SYSTEM STOP MERGES replacing_m3;
+select count() from replacing_m3;
+0
+insert into replacing_m3 values (0,0);
+insert into replacing_m3 values (0,0);
+insert into replacing_m3 values (-1,1);
+insert into replacing_m3 values (-2,2);
+select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
+ReadFromPreparedSource (Optimized trivial count)
+select count() from replacing_m3;
+4
+select count(*) from replacing_m3;
+4
+select count(a) from replacing_m3;
+4
+select count(b) from replacing_m3;
+4
+set optimize_trivial_count_query=0; -- FIXME: wrong result for queries with FINAL
+select count() from replacing_m3 FINAL;
+3
+select count(a) from replacing_m3 FINAL;
+3
+select count(b) from replacing_m3 FINAL;
+3
diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.sql b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql
new file mode 100644
index 00000000000..d4a686e6eff
--- /dev/null
+++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql
@@ -0,0 +1,46 @@
+drop table if exists m3;
+drop table if exists replacing_m3;
+
+-- { echoOn }
+set allow_experimental_analyzer=1;
+set optimize_trivial_count_query=1;
+
+create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple();
+
+select count() from m3;
+
+insert into m3 values (0,0);
+insert into m3 values (-1,1);
+
+select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
+select count() from m3;
+select count(*) from m3;
+select count(a) from m3;
+select count(b) from m3;
+select count() + 1 from m3;
+
+-- drop table m3;
+
+-- checking queries with FINAL
+create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b);
+SYSTEM STOP MERGES replacing_m3;
+
+select count() from replacing_m3;
+
+insert into replacing_m3 values (0,0);
+insert into replacing_m3 values (0,0);
+insert into replacing_m3 values (-1,1);
+insert into replacing_m3 values (-2,2);
+
+select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%';
+select count() from replacing_m3;
+select count(*) from replacing_m3;
+select count(a) from replacing_m3;
+select count(b) from replacing_m3;
+
+set optimize_trivial_count_query=0; -- FIXME: wrong result for queries with FINAL
+select count() from replacing_m3 FINAL;
+select count(a) from replacing_m3 FINAL;
+select count(b) from replacing_m3 FINAL;
+
+-- drop table replacing_m3;

From 7684153100907ad3987980a66b79824158d10dac Mon Sep 17 00:00:00 2001
From: artem-yadr <84010375+artem-yadr@users.noreply.github.com>
Date: Mon, 27 Feb 2023 16:18:08 +0300
Subject: [PATCH 092/333] Redeclaration error fix

---
 base/poco/MongoDB/src/Connection.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/poco/MongoDB/src/Connection.cpp b/base/poco/MongoDB/src/Connection.cpp
index c75211cf9d7..38c31d2250a 100644
--- a/base/poco/MongoDB/src/Connection.cpp
+++ b/base/poco/MongoDB/src/Connection.cpp
@@ -237,7 +237,7 @@ void Connection::connect(const std::string& uri, SocketFactory& socketFactory)
     for (std::vector<std::string>::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it)
     {
         newURI = *it;
-        Poco::URI theURI(newURI);
+        theURI = Poco::URI(newURI);
 
         std::string host = theURI.getHost();
         Poco::UInt16 port = theURI.getPort();

From 385beda8888dcabc895d0538914b3ce747c7ea67 Mon Sep 17 00:00:00 2001
From: artem-yadr <84010375+artem-yadr@users.noreply.github.com>
Date: Mon, 27 Feb 2023 16:25:12 +0300
Subject: [PATCH 093/333] Update
 base/poco/MongoDB/include/Poco/MongoDB/Connection.h

Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
---
 base/poco/MongoDB/include/Poco/MongoDB/Connection.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
index 8fd9c7919e4..5c5f6f76638 100644
--- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
+++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
@@ -91,7 +91,7 @@ namespace MongoDB
         Poco::Net::SocketAddress address() const;
         /// Returns the address of the MongoDB server.
         
-        std::string uri() const;
+        const std::string & uri() const;
         /// Returns the uri on which the connection was made.
 
         void connect(const std::string & hostAndPort);

From 0de2eb8a795908132ed2b6eaaa4bd09fb76e9f2d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 27 Feb 2023 14:45:39 +0000
Subject: [PATCH 094/333] Update version_date.tsv and changelogs after
 v22.8.14.53-lts

---
 docs/changelogs/v22.8.14.53-lts.md   | 40 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 2 files changed, 41 insertions(+)
 create mode 100644 docs/changelogs/v22.8.14.53-lts.md

diff --git a/docs/changelogs/v22.8.14.53-lts.md b/docs/changelogs/v22.8.14.53-lts.md
new file mode 100644
index 00000000000..5978080fa3a
--- /dev/null
+++ b/docs/changelogs/v22.8.14.53-lts.md
@@ -0,0 +1,40 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18)
+
+#### Performance Improvement
+* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
+* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3814e94bf24..50937edb449 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -25,6 +25,7 @@ v22.9.4.32-stable	2022-10-26
 v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.14.53-lts	2023-02-27
 v22.8.13.20-lts	2023-01-29
 v22.8.12.45-lts	2023-01-10
 v22.8.11.15-lts	2022-12-08

From 7b85b35534fd65fae459e5ce13b5328da7d6c6b0 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 27 Feb 2023 15:11:25 +0000
Subject: [PATCH 095/333] fix skip indexes

---
 src/Storages/IndicesDescription.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index 00a3636b605..591a9082f7a 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -96,6 +96,12 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
     result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);
     result.sample_block = result.expression->getSampleBlock();
 
+    for (const auto & elem : result.sample_block)
+    {
+        result.column_names.push_back(elem.name);
+        result.data_types.push_back(elem.type);
+    }
+
     const auto & definition_arguments = index_definition->type->arguments;
     if (definition_arguments)
     {

From 2e921e3d6b4abcdf61ff4eec0405038f528175c0 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Mon, 27 Feb 2023 16:00:19 +0000
Subject: [PATCH 096/333] Fix date and int inference from string in JSON

---
 src/Formats/SchemaInferenceUtils.cpp                          | 1 +
 src/Processors/Formats/ISchemaReader.h                        | 4 ++--
 .../02674_date_int_string_json_inference.reference            | 1 +
 .../0_stateless/02674_date_int_string_json_inference.sql      | 2 ++
 4 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02674_date_int_string_json_inference.reference
 create mode 100644 tests/queries/0_stateless/02674_date_int_string_json_inference.sql

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 7a242a9f81c..00eb686385d 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -131,6 +131,7 @@ namespace
 
             type_indexes.erase(TypeIndex::Date);
             type_indexes.erase(TypeIndex::DateTime);
+            type_indexes.insert(TypeIndex::String);
             return;
         }
 
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
index edc5c6068c3..330acc99369 100644
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -170,10 +170,10 @@ void chooseResultColumnType(
             ErrorCodes::TYPE_MISMATCH,
             "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
             "You can specify the type for this column using setting schema_inference_hints",
-            type->getName(),
+            new_type->getName(),
             column_name,
             row,
-            new_type->getName());
+            type->getName());
     }
 }
 
diff --git a/tests/queries/0_stateless/02674_date_int_string_json_inference.reference b/tests/queries/0_stateless/02674_date_int_string_json_inference.reference
new file mode 100644
index 00000000000..2e89d6a15a4
--- /dev/null
+++ b/tests/queries/0_stateless/02674_date_int_string_json_inference.reference
@@ -0,0 +1 @@
+x	Nullable(String)					
diff --git a/tests/queries/0_stateless/02674_date_int_string_json_inference.sql b/tests/queries/0_stateless/02674_date_int_string_json_inference.sql
new file mode 100644
index 00000000000..21abf763cbf
--- /dev/null
+++ b/tests/queries/0_stateless/02674_date_int_string_json_inference.sql
@@ -0,0 +1,2 @@
+desc format(JSONEachRow, '{"x" : "2020-01-01"}, {"x" : "1000"}')
+

From e01c9ff5ec080ca9537843e058ea9e0ccea42ac6 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 27 Feb 2023 17:37:29 +0000
Subject: [PATCH 097/333] Fixes: group by, final

---
 src/Planner/PlannerJoinTree.cpp | 34 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 9c005314421..8b7489d52c8 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -154,7 +154,7 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage
 bool applyTrivialCountIfPossible(
     QueryPlan & query_plan,
     const TableNode & table_node,
-    const SelectQueryInfo & select_query_info,
+    const QueryTreeNodePtr & query_tree,
     const ContextPtr & query_context,
     const Names & columns_names)
 {
@@ -163,28 +163,26 @@ bool applyTrivialCountIfPossible(
         return false;
 
     /// can't apply if FINAL
-    if (select_query_info.table_expression_modifiers.has_value() && select_query_info.table_expression_modifiers.value().hasFinal())
+    if (table_node.getTableExpressionModifiers().has_value() && table_node.getTableExpressionModifiers()->hasFinal())
+        return false;
+
+    auto & main_query_node = query_tree->as<QueryNode &>();
+    if (main_query_node.hasGroupBy())
         return false;
 
     const auto & storage = table_node.getStorage();
-    const auto & storage_snapshot = table_node.getStorageSnapshot();
-
-    bool can_apply_trivial_count = (settings.max_parallel_replicas <= 1) && //
-        !settings.allow_experimental_query_deduplication && //
-        !settings.empty_result_for_aggregation_by_empty_set && //
-        storage && //
-        storage->getName() != "MaterializedMySQL" && //
-        !storage->hasLightweightDeletedMask() && //
-        select_query_info.filter_asts.empty() && // ???
-        select_query_info.has_aggregates;
-    if (!can_apply_trivial_count)
+    if (!storage || storage->hasLightweightDeletedMask())
         return false;
 
-    QueryTreeNodes aggregates = collectAggregateFunctionNodes(select_query_info.query_tree);
+    if (settings.max_parallel_replicas > 1 || //
+        settings.allow_experimental_query_deduplication || //
+        settings.empty_result_for_aggregation_by_empty_set)
+        return false;
+
+    QueryTreeNodes aggregates = collectAggregateFunctionNodes(query_tree);
     if (aggregates.size() != 1)
         return false;
 
-    auto & main_query_node = select_query_info.query_tree->as<QueryNode &>();
     /// dump main query tree
     {
         WriteBufferFromOwnString buffer;
@@ -250,7 +248,7 @@ bool applyTrivialCountIfPossible(
     DataTypes argument_types;
     argument_types.reserve(columns_names.size());
     {
-        const Block source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
+        const Block source_header = table_node.getStorageSnapshot()->getSampleBlockForColumns(columns_names);
         for (const auto & column_name : columns_names)
             argument_types.push_back(source_header.getByName(column_name).type);
     }
@@ -434,8 +432,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
         }
 
         /// apply trivial_count optimization if possible
-        bool is_trivial_count_applied = is_single_table_expression && table_node
-            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info, planner_context->getQueryContext(), columns_names);
+        bool is_trivial_count_applied = is_single_table_expression && table_node && select_query_info.has_aggregates
+            && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getQueryContext(), columns_names);
 
         if (is_trivial_count_applied)
         {

From ab899bf2f3220e07b4316c1db772eb2cde185087 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Mon, 27 Feb 2023 19:28:19 +0000
Subject: [PATCH 098/333] Allow types conversion in Native input format

---
 .../operations/settings/settings-formats.md   | 12 ++++++--
 src/Core/Settings.h                           |  2 ++
 src/Core/SettingsChangesHistory.h             |  3 +-
 src/DataTypes/DataTypeLowCardinality.h        |  2 +-
 .../DataTypeLowCardinalityHelpers.cpp         |  8 ++---
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  5 ++++
 src/Formats/NativeReader.cpp                  | 30 +++++++++++++++++--
 src/Formats/NativeReader.h                    |  2 ++
 src/Processors/Formats/Impl/NativeFormat.cpp  |  1 +
 .../Algorithms/AggregatingSortedAlgorithm.cpp |  2 +-
 .../Algorithms/SummingSortedAlgorithm.cpp     |  2 +-
 .../02567_native_type_conversions.reference   |  3 ++
 .../02567_native_type_conversions.sh          | 12 ++++++++
 14 files changed, 72 insertions(+), 13 deletions(-)
 create mode 100644 tests/queries/0_stateless/02567_native_type_conversions.reference
 create mode 100755 tests/queries/0_stateless/02567_native_type_conversions.sh

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index c6eb8c1f2c0..a7a7d78ba4d 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -1468,7 +1468,7 @@ Default value: `65505`.
 
 The name of table that will be used in the output INSERT statement.
 
-Default value: `'table''`.
+Default value: `table`.
 
 ### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names}
 
@@ -1508,4 +1508,12 @@ Disabled by default.
 
 The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit.
 
-Default value: `1GiB`
+Default value: `1GiB`.
+
+## Native format settings {#native-format-settings}
+
+### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
+
+Allow types conversion in Native input format between columns from input data and requested columns. 
+
+Enabled by default.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c1b396778ab..43207b517ca 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -819,6 +819,8 @@ class IColumn;
     M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \
     M(UInt64, input_format_tsv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in TSV format", 0) \
     \
+    M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \
+    \
     M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
     M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
     \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index c9d68b94a5e..b153372dd92 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,7 +80,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"23.3", {{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
+    {"23.3", {{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
+              {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input format"}}},
     {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
               {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
               {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h
index 57f67ddad7a..d301a0f5443 100644
--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@@ -86,6 +86,6 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type);
 ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column);
 
 /// Convert column of type from_type to type to_type by converting nested LowCardinality columns.
-ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type);
+ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type);
 
 }
diff --git a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
index 8a61afee420..98eb76267a4 100644
--- a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
+++ b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
@@ -113,7 +113,7 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
     return column;
 }
 
-ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
+ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
 {
     if (!column)
         return column;
@@ -128,7 +128,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr &
     if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
     {
         const auto & nested = column_const->getDataColumnPtr();
-        auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type);
+        auto nested_no_lc = recursiveLowCardinalityTypeConversion(nested, from_type, to_type);
         if (nested.get() == nested_no_lc.get())
             return column;
 
@@ -164,7 +164,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr &
             const auto & nested_to = to_array_type->getNestedType();
 
             return ColumnArray::create(
-                    recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
+                    recursiveLowCardinalityTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
                     column_array->getOffsetsPtr());
         }
     }
@@ -187,7 +187,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr &
             for (size_t i = 0; i < columns.size(); ++i)
             {
                 auto & element = columns[i];
-                auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i));
+                auto element_no_lc = recursiveLowCardinalityTypeConversion(element, from_elements.at(i), to_elements.at(i));
                 if (element.get() != element_no_lc.get())
                 {
                     element = element_no_lc;
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index a22926973ed..f9e704e0445 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -190,6 +190,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string;
     format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference;
     format_settings.max_binary_string_size = settings.format_binary_max_string_size;
+    format_settings.native.allow_types_conversion = settings.input_format_native_allow_types_conversion;
     format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth;
     format_settings.client_protocol_version = context->getClientProtocolVersion();
 
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 5e95c629c7c..7ed0e7d069e 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -326,6 +326,11 @@ struct FormatSettings
         bool output_string_as_string;
         bool skip_fields_with_unsupported_types_in_schema_inference;
     } bson;
+
+    struct
+    {
+        bool allow_types_conversion = true;
+    } native;
 };
 
 }
diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp
index 9f8d4ba1930..eca88a41c13 100644
--- a/src/Formats/NativeReader.cpp
+++ b/src/Formats/NativeReader.cpp
@@ -15,6 +15,8 @@
 #include <DataTypes/Serializations/SerializationInfo.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 
+#include <Interpreters/castColumn.h>
+
 
 namespace DB
 {
@@ -39,12 +41,14 @@ NativeReader::NativeReader(
     UInt64 server_revision_,
     bool skip_unknown_columns_,
     bool null_as_default_,
+    bool allow_types_conversion_,
     BlockMissingValues * block_missing_values_)
     : istr(istr_)
     , header(header_)
     , server_revision(server_revision_)
     , skip_unknown_columns(skip_unknown_columns_)
     , null_as_default(null_as_default_)
+    , allow_types_conversion(allow_types_conversion_)
     , block_missing_values(block_missing_values_)
 {
 }
@@ -204,11 +208,31 @@ Block NativeReader::read()
                 if (null_as_default)
                     insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values);
 
-                /// Support insert from old clients without low cardinality type.
                 if (!header_column.type->equals(*column.type))
                 {
-                    column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type);
-                    column.type = header.safeGetByPosition(i).type;
+                    if (allow_types_conversion)
+                    {
+                        try
+                        {
+                            column.column = castColumn(column, header_column.type);
+                        }
+                        catch (Exception & e)
+                        {
+                            e.addMessage(fmt::format(
+                                "while converting column \"{}\" from type {} to type {}",
+                                column.name,
+                                column.type->getName(),
+                                header_column.type->getName()));
+                            throw;
+                        }
+                    }
+                    else
+                    {
+                        /// Support insert from old clients without low cardinality type.
+                        column.column = recursiveLowCardinalityTypeConversion(column.column, column.type, header_column.type);
+                    }
+
+                    column.type = header_column.type;
                 }
             }
             else
diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h
index 2d8b16e06eb..3cec4afd997 100644
--- a/src/Formats/NativeReader.h
+++ b/src/Formats/NativeReader.h
@@ -30,6 +30,7 @@ public:
         UInt64 server_revision_,
         bool skip_unknown_columns_ = false,
         bool null_as_default_ = false,
+        bool allow_types_conversion_ = false,
         BlockMissingValues * block_missing_values_ = nullptr);
 
     /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read.
@@ -51,6 +52,7 @@ private:
     UInt64 server_revision;
     bool skip_unknown_columns = false;
     bool null_as_default = false;
+    bool allow_types_conversion = false;
     BlockMissingValues * block_missing_values = nullptr;
 
     bool use_index = false;
diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp
index bd1b13ce2ef..d3fd9ef73e1 100644
--- a/src/Processors/Formats/Impl/NativeFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeFormat.cpp
@@ -23,6 +23,7 @@ public:
               0,
               settings.skip_unknown_fields,
               settings.null_as_default,
+              settings.native.allow_types_conversion,
               settings.defaults_for_omitted_fields ? &block_missing_values : nullptr))
         , header(header_) {}
 
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
index 560be60987b..ef103eb508c 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
@@ -117,7 +117,7 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co
         {
             const auto & from_type = desc.nested_type;
             const auto & to_type = desc.real_type;
-            columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type);
+            columns[desc.column_number] = recursiveLowCardinalityTypeConversion(columns[desc.column_number], from_type, to_type);
         }
     }
 
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
index 0f1775d4ac0..d8e95e6b950 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
@@ -450,7 +450,7 @@ static void postprocessChunk(
         {
             const auto & from_type = desc.nested_type;
             const auto & to_type = desc.real_type;
-            res_columns[desc.column_numbers[0]] = recursiveTypeConversion(column, from_type, to_type);
+            res_columns[desc.column_numbers[0]] = recursiveLowCardinalityTypeConversion(column, from_type, to_type);
         }
         else
             res_columns[desc.column_numbers[0]] = std::move(column);
diff --git a/tests/queries/0_stateless/02567_native_type_conversions.reference b/tests/queries/0_stateless/02567_native_type_conversions.reference
new file mode 100644
index 00000000000..5c223870c11
--- /dev/null
+++ b/tests/queries/0_stateless/02567_native_type_conversions.reference
@@ -0,0 +1,3 @@
+1
+42
+1
diff --git a/tests/queries/0_stateless/02567_native_type_conversions.sh b/tests/queries/0_stateless/02567_native_type_conversions.sh
new file mode 100755
index 00000000000..976c42f07c1
--- /dev/null
+++ b/tests/queries/0_stateless/02567_native_type_conversions.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select 42::UInt8 as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=0 2>&1 | grep "TYPE_MISMATCH" -c
+
+$CLICKHOUSE_LOCAL -q "select 42::UInt8 as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=1
+
+$CLICKHOUSE_LOCAL -q "select 'Hello' as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=1 2>&1 | grep 'while converting column "x" from type String to type UInt64' -c
+

From d5469c0ab49106ad55aef68a8eaaec17d59d4081 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 28 Feb 2023 00:07:50 +0100
Subject: [PATCH 099/333] fix clickhouse-test

---
 tests/clickhouse-test | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index cfd2546bbdd..06a30317346 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -73,7 +73,14 @@ def stringhash(s):
 
 # First and last lines of the log
 def trim_for_log(s):
-    return s
+    if not s:
+        return s
+    lines = s.splitlines()
+    if len(lines) > 10000:
+        separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" +  "-" * 40
+        return "\n".join(lines[:5000] + [] + [separator] + [] + lines[-5000:])
+    else:
+        return "\n".join(lines)
 
 
 class HTTPError(Exception):

From 5fb1f20882677f66ca5c50decd01dcaae4677cab Mon Sep 17 00:00:00 2001
From: AndyB <andrey.bystrov@deliveroo.co.uk>
Date: Mon, 27 Feb 2023 23:16:34 +0000
Subject: [PATCH 100/333] LOG_DEBUG over LOG_INFO

---
 src/Interpreters/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 435401796a0..f3c103f6af2 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -995,7 +995,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                     {
                         double elapsed_seconds = static_cast<double>(info.elapsed_microseconds) / 1000000.0;
                         double rows_per_second = static_cast<double>(elem.read_rows) / elapsed_seconds;
-                        LOG_INFO(
+                        LOG_DEBUG(
                             &Poco::Logger::get("executeQuery"),
                             "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.",
                             elem.read_rows,

From 42c5634ae4af14602250e2938d28d70c064d6247 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 28 Feb 2023 11:35:36 +0000
Subject: [PATCH 101/333] Remove debug logging

---
 src/Planner/PlannerJoinTree.cpp | 39 ++++++++++-----------------------
 1 file changed, 12 insertions(+), 27 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 8b7489d52c8..11944b4b71d 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -39,14 +39,14 @@
 #include <Planner/PlannerJoins.h>
 #include <Planner/PlannerActionsVisitor.h>
 #include <Planner/Utils.h>
+
 #include <AggregateFunctions/AggregateFunctionCount.h>
-#include <Interpreters/TreeRewriter.h>
-#include "Analyzer/AggregationUtils.h"
-#include "Analyzer/FunctionNode.h"
-#include <Processors/Sources/SourceFromSingleChunk.h>
+#include <Analyzer/AggregationUtils.h>
+#include <Analyzer/FunctionNode.h>
 #include <Columns/ColumnAggregateFunction.h>
 #include <Common/scope_guard_safe.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
+#include <Processors/Sources/SourceFromSingleChunk.h>
 
 namespace DB
 {
@@ -183,22 +183,6 @@ bool applyTrivialCountIfPossible(
     if (aggregates.size() != 1)
         return false;
 
-    /// dump main query tree
-    {
-        WriteBufferFromOwnString buffer;
-        IQueryTreeNode::FormatState format_state;
-        main_query_node.dumpTreeImpl(buffer, format_state, 0);
-
-        LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "main_query_node:\n{}", buffer.str());
-        LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "Projection column:\n{}", main_query_node.getProjectionColumns().front().dump());
-    }
-
-    {
-        WriteBufferFromOwnString buffer;
-        buffer << columns_names;
-        LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "{}", buffer.str());
-    }
-
     const auto * function_node = typeid_cast<const FunctionNode *>(aggregates.front().get());
     if (!function_node)
         return false;
@@ -206,19 +190,22 @@ bool applyTrivialCountIfPossible(
     if (!function_node->getAggregateFunction())
         return false;
 
-    LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "Aggregation: {}", function_node->getFunctionName());
-
     const auto * count_func = typeid_cast<const AggregateFunctionCount *>(function_node->getAggregateFunction().get());
     if (!count_func)
         return false;
 
     /// get number of rows
     std::optional<UInt64> num_rows{};
-    // if (!query_tree.  prewhere() && !query.where() && !context->getCurrentTransaction())
-    if (!main_query_node.hasPrewhere() && !main_query_node.hasWhere())
+    /// Transaction check here is necessary because
+    /// MergeTree maintains total count for all parts in Active state and it simply returns that number for trivial select count() from table query.
+    /// But if we have current transaction, then we should return number of rows in current snapshot (that may include parts in Outdated state),
+    /// so we have to use totalRowsByPartitionPredicate() instead of totalRows even for trivial query
+    /// See https://github.com/ClickHouse/ClickHouse/pull/24258/files#r828182031
+    if (!main_query_node.hasPrewhere() && !main_query_node.hasWhere() && !query_context->getCurrentTransaction())
     {
         num_rows = storage->totalRows(settings);
     }
+    // TODO:
     // else // It's possible to optimize count() given only partition predicates
     // {
     //     SelectQueryInfo temp_query_info;
@@ -231,14 +218,12 @@ bool applyTrivialCountIfPossible(
     if (!num_rows)
         return false;
 
-    LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "Number of rows: {}", num_rows.value());
-
     /// set aggregation state
     const AggregateFunctionCount & agg_count = *count_func;
     std::vector<char> state(agg_count.sizeOfData());
     AggregateDataPtr place = state.data();
     agg_count.create(place);
-    // SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
+    SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
     agg_count.set(place, num_rows.value());
 
     auto column = ColumnAggregateFunction::create(function_node->getAggregateFunction());

From d953baa7e359bf768cabc4d7bab799eec8a56ce0 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 28 Feb 2023 16:12:51 +0000
Subject: [PATCH 102/333] Review improvements + fixup

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 60 +++++++++++------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 78291d29908..24e94f095e3 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -445,6 +445,12 @@ public:
             alias_name_to_expressions[node_alias].push_back(node);
         }
 
+        if (const auto * function = node->as<FunctionNode>())
+        {
+            if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->getFunctionName()))
+                ++aggregate_functions_counter;
+        }
+
         expressions.emplace_back(node);
     }
 
@@ -463,6 +469,12 @@ public:
                 alias_name_to_expressions.erase(it);
         }
 
+        if (const auto * function = top_expression->as<FunctionNode>())
+        {
+            if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->getFunctionName()))
+                --aggregate_functions_counter;
+        }
+
         expressions.pop_back();
     }
 
@@ -483,17 +495,7 @@ public:
 
     bool hasAggregateFunction() const
     {
-        const auto & factory = AggregateFunctionFactory::instance();
-        for (const auto & node : expressions)
-        {
-            const auto * function = node->as<FunctionNode>();
-            if (!function)
-                continue;
-
-            if (factory.isAggregateFunctionName(function->getFunctionName()))
-                return true;
-        }
-        return false;
+        return aggregate_functions_counter > 0;
     }
 
     QueryTreeNodePtr getExpressionWithAlias(const std::string & alias) const
@@ -542,6 +544,7 @@ public:
 
 private:
     QueryTreeNodes expressions;
+    size_t aggregate_functions_counter = 0;
     std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
 };
 
@@ -3114,11 +3117,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
             resolve_result.resolve_place = IdentifierResolvePlace::DATABASE_CATALOG;
     }
 
-    if (resolve_result.resolved_identifier
-        && scope.nullable_group_by_keys.contains(resolve_result.resolved_identifier)
-        && !scope.expressions_in_resolve_process_stack.hasAggregateFunction())
-        resolve_result.resolved_identifier->convertToNullable();
-
     it->second = resolve_result;
 
     /** If identifier was not resolved, or during expression resolution identifier was explicitly added into non cached set,
@@ -3126,8 +3124,7 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
       */
     if (!resolve_result.resolved_identifier ||
         scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) ||
-        !scope.use_identifier_lookup_to_result_cache ||
-        scope.group_by_use_nulls)
+        !scope.use_identifier_lookup_to_result_cache)
         scope.identifier_lookup_to_result.erase(it);
 
     return resolve_result;
@@ -4655,8 +4652,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         }
 
         function_node.resolveAsFunction(std::move(function_base));
-        if (scope.group_by_use_nulls && scope.nullable_group_by_keys.contains(node))
-            function_node.convertToNullable();
     }
     catch (Exception & e)
     {
@@ -4905,12 +4900,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             if (result_projection_names.empty())
                 result_projection_names.push_back(column_node.getColumnName());
 
-            if (scope.group_by_use_nulls && scope.nullable_group_by_keys.contains(node))
-            {
-                node = node->clone();
-                node->convertToNullable();
-            }
-
             break;
         }
         case QueryTreeNodeType::FUNCTION:
@@ -4994,6 +4983,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
         }
     }
 
+    if (node
+        && scope.nullable_group_by_keys.contains(node)
+        && !scope.expressions_in_resolve_process_stack.hasAggregateFunction())
+    {
+        node = node->clone();
+        node->convertToNullable();
+    }
+
     /** Update aliases after expression node was resolved.
       * Do not update node in alias table if we resolve it for duplicate alias.
       */
@@ -6074,10 +6071,13 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                 resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
             }
 
-            for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
+            if (scope.group_by_use_nulls)
             {
-                for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
-                    scope.nullable_group_by_keys.insert(group_by_elem->clone());
+                for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
+                {
+                    for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
+                        scope.nullable_group_by_keys.insert(group_by_elem);
+                }
             }
         }
         else
@@ -6090,7 +6090,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
             if (scope.group_by_use_nulls)
             {
                 for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
-                    scope.nullable_group_by_keys.insert(group_by_elem->clone());
+                    scope.nullable_group_by_keys.insert(group_by_elem);
             }
         }
     }

From f53b65a7cc2568d1c4b6850d1e57a88f94dc51ea Mon Sep 17 00:00:00 2001
From: artem-yadr <84010375+artem-yadr@users.noreply.github.com>
Date: Tue, 28 Feb 2023 21:01:03 +0300
Subject: [PATCH 103/333] small fix

---
 base/poco/MongoDB/include/Poco/MongoDB/Connection.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
index 5c5f6f76638..dcb813b75bc 100644
--- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
+++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h
@@ -162,7 +162,7 @@ namespace MongoDB
     {
         return _address;
     }
-    inline std::string Connection::uri() const
+    inline const std::string & Connection::uri() const
     {
     	return _uri;
     }

From 3f892e52abaf1791b6f1dbe27b3ed6ddad20e7f1 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 22 Jan 2023 13:20:38 +0100
Subject: [PATCH 104/333] Revert "Revert "Merge pull request #44922 from
 azat/dist/async-INSERT-metrics""

This is the revert of revert since there will be follow up patches to
address the issues.

This reverts commit a55798626a491139d84692a3c546f27cdde71665.
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 298 +++++++++---------
 src/Storages/Distributed/DirectoryMonitor.h   |  27 +-
 src/Storages/Distributed/DistributedSink.cpp  |  20 +-
 src/Storages/StorageDistributed.cpp           |  14 +-
 src/Storages/StorageDistributed.h             |   2 +-
 ..._INSERT_block_structure_mismatch.reference |   4 +-
 6 files changed, 190 insertions(+), 175 deletions(-)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index cb6659e59ce..f2a3471d839 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -59,6 +59,7 @@ namespace ErrorCodes
     extern const int TOO_MANY_PARTITIONS;
     extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
     extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int LOGICAL_ERROR;
 }
 
 
@@ -363,18 +364,22 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
     const std::string & relative_path_,
     ConnectionPoolPtr pool_,
     ActionBlocker & monitor_blocker_,
-    BackgroundSchedulePool & bg_pool)
+    BackgroundSchedulePool & bg_pool,
+    bool initialize_from_disk)
     : storage(storage_)
     , pool(std::move(pool_))
     , disk(disk_)
     , relative_path(relative_path_)
     , path(fs::path(disk->getPath()) / relative_path / "")
+    , broken_relative_path(fs::path(relative_path) / "broken")
+    , broken_path(fs::path(path) / "broken" / "")
     , should_batch_inserts(storage.getDistributedSettingsRef().monitor_batch_inserts)
     , split_batch_on_failure(storage.getDistributedSettingsRef().monitor_split_batch_on_failure)
     , dir_fsync(storage.getDistributedSettingsRef().fsync_directories)
     , min_batched_block_size_rows(storage.getContext()->getSettingsRef().min_insert_block_size_rows)
     , min_batched_block_size_bytes(storage.getContext()->getSettingsRef().min_insert_block_size_bytes)
     , current_batch_file_path(path + "current_batch.txt")
+    , pending_files(std::numeric_limits<size_t>::max())
     , default_sleep_time(storage.getDistributedSettingsRef().monitor_sleep_time_ms.totalMilliseconds())
     , sleep_time(default_sleep_time)
     , max_sleep_time(storage.getDistributedSettingsRef().monitor_max_sleep_time_ms.totalMilliseconds())
@@ -383,6 +388,11 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
     , metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0)
     , metric_broken_files(CurrentMetrics::BrokenDistributedFilesToInsert, 0)
 {
+    fs::create_directory(broken_path);
+
+    if (initialize_from_disk)
+        initializeFilesFromDisk();
+
     task_handle = bg_pool.createTask(getLoggerName() + "/Bg", [this]{ run(); });
     task_handle->activateAndSchedule();
 }
@@ -390,35 +400,29 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
 
 StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
 {
-    if (!quit)
+    if (!pending_files.isFinished())
     {
-        quit = true;
+        pending_files.clearAndFinish();
         task_handle->deactivate();
     }
 }
 
 void StorageDistributedDirectoryMonitor::flushAllData()
 {
-    if (quit)
+    if (pending_files.isFinished())
         return;
 
     std::lock_guard lock{mutex};
-
-    const auto & files = getFiles();
-    if (!files.empty())
-    {
-        processFiles(files);
-
-        /// Update counters.
-        getFiles();
-    }
+    if (!hasPendingFiles())
+        return;
+    processFiles();
 }
 
 void StorageDistributedDirectoryMonitor::shutdownAndDropAllData()
 {
-    if (!quit)
+    if (!pending_files.isFinished())
     {
-        quit = true;
+        pending_files.clearAndFinish();
         task_handle->deactivate();
     }
 
@@ -432,19 +436,21 @@ void StorageDistributedDirectoryMonitor::run()
     std::lock_guard lock{mutex};
 
     bool do_sleep = false;
-    while (!quit)
+    while (!pending_files.isFinished())
     {
         do_sleep = true;
 
-        const auto & files = getFiles();
-        if (files.empty())
+        if (!hasPendingFiles())
             break;
 
         if (!monitor_blocker.isCancelled())
         {
             try
             {
-                do_sleep = !processFiles(files);
+                processFiles();
+                /// No errors while processing existing files.
+                /// Let's see maybe there are more files to process.
+                do_sleep = false;
 
                 std::lock_guard status_lock(status_mutex);
                 status.last_exception = std::exception_ptr{};
@@ -469,9 +475,7 @@ void StorageDistributedDirectoryMonitor::run()
             }
         }
         else
-        {
             LOG_DEBUG(log, "Skipping send data over distributed table.");
-        }
 
         const auto now = std::chrono::system_clock::now();
         if (now - last_decrease_time > decrease_error_count_period)
@@ -486,10 +490,7 @@ void StorageDistributedDirectoryMonitor::run()
             break;
     }
 
-    /// Update counters.
-    getFiles();
-
-    if (!quit && do_sleep)
+    if (!pending_files.isFinished() && do_sleep)
         task_handle->scheduleAfter(sleep_time.count());
 }
 
@@ -567,41 +568,83 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
         settings.distributed_replica_error_cap);
 }
 
-
-std::map<UInt64, std::string> StorageDistributedDirectoryMonitor::getFiles()
+bool StorageDistributedDirectoryMonitor::hasPendingFiles() const
 {
-    std::map<UInt64, std::string> files;
+    return fs::exists(current_batch_file_path) || !current_batch_file.empty() || !pending_files.empty();
+}
+
+void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
+{
+    /// NOTE: This method does not requires to hold status_mutex, hence, no TSA
+    /// annotations in the header file.
 
     fs::directory_iterator end;
-    for (fs::directory_iterator it{path}; it != end; ++it)
+
+    /// Initialize pending files
     {
-        const auto & file_path_str = it->path();
-        if (!it->is_directory() && startsWith(fs::path(file_path_str).extension(), ".bin"))
+        size_t bytes_count = 0;
+
+        for (fs::directory_iterator it{path}; it != end; ++it)
         {
-            files[parse<UInt64>(fs::path(file_path_str).stem())] = file_path_str;
+            const auto & file_path = it->path();
+            const auto & base_name = file_path.stem().string();
+            if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(base_name))
+            {
+                const std::string & file_path_str = file_path.string();
+                if (!pending_files.push(file_path_str))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
+                bytes_count += fs::file_size(file_path);
+            }
+            else if (base_name != "tmp" && base_name != "broken")
+            {
+                /// It is OK to log current_batch.txt here too (useful for debugging).
+                LOG_WARNING(log, "Unexpected file {} in {}", file_path.string(), path);
+            }
         }
+
+        LOG_TRACE(log, "Files set to {}", pending_files.size());
+        LOG_TRACE(log, "Bytes set to {}", bytes_count);
+
+        metric_pending_files.changeTo(pending_files.size());
+        status.files_count = pending_files.size();
+        status.bytes_count = bytes_count;
     }
 
-    return files;
+    /// Initialize broken files
+    {
+        size_t broken_bytes_count = 0;
+        size_t broken_files = 0;
+
+        for (fs::directory_iterator it{broken_path}; it != end; ++it)
+        {
+            const auto & file_path = it->path();
+            if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(file_path.stem()))
+                broken_bytes_count += fs::file_size(file_path);
+            else
+                LOG_WARNING(log, "Unexpected file {} in {}", file_path.string(), broken_path);
+        }
+
+        LOG_TRACE(log, "Broken files set to {}", broken_files);
+        LOG_TRACE(log, "Broken bytes set to {}", broken_bytes_count);
+
+        metric_broken_files.changeTo(broken_files);
+        status.broken_files_count = broken_files;
+        status.broken_bytes_count = broken_bytes_count;
+    }
 }
-bool StorageDistributedDirectoryMonitor::processFiles(const std::map<UInt64, std::string> & files)
+void StorageDistributedDirectoryMonitor::processFiles()
 {
     if (should_batch_inserts)
-    {
-        processFilesWithBatching(files);
-    }
+        processFilesWithBatching();
     else
     {
-        for (const auto & file : files)
-        {
-            if (quit)
-                return true;
+        /// Process unprocessed file.
+        if (!current_batch_file.empty())
+            processFile(current_batch_file);
 
-            processFile(file.second);
-        }
+        while (pending_files.tryPop(current_batch_file))
+            processFile(current_batch_file);
     }
-
-    return true;
 }
 
 void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path)
@@ -648,7 +691,11 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
             thread_trace_context->root_span.addAttribute(std::current_exception());
 
         e.addMessage(fmt::format("While sending {}", file_path));
-        maybeMarkAsBroken(file_path, e);
+        if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
+        {
+            markAsBroken(file_path);
+            current_batch_file.clear();
+        }
         throw;
     }
     catch (...)
@@ -661,6 +708,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
 
     auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
     markAsSend(file_path);
+    current_batch_file.clear();
     LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds());
 }
 
@@ -700,25 +748,19 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
 
 struct StorageDistributedDirectoryMonitor::Batch
 {
-    /// File indexes for this batch.
-    std::vector<UInt64> file_indices;
     size_t total_rows = 0;
     size_t total_bytes = 0;
     bool recovered = false;
 
     StorageDistributedDirectoryMonitor & parent;
-    /// Information about all available indexes (not only for the current batch).
-    const std::map<UInt64, String> & file_index_to_path;
+    std::vector<std::string> files;
 
     bool split_batch_on_failure = true;
     bool fsync = false;
     bool dir_fsync = false;
 
-    Batch(
-        StorageDistributedDirectoryMonitor & parent_,
-        const std::map<UInt64, String> & file_index_to_path_)
+    explicit Batch(StorageDistributedDirectoryMonitor & parent_)
         : parent(parent_)
-        , file_index_to_path(file_index_to_path_)
         , split_batch_on_failure(parent.split_batch_on_failure)
         , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert)
         , dir_fsync(parent.dir_fsync)
@@ -733,7 +775,7 @@ struct StorageDistributedDirectoryMonitor::Batch
 
     void send()
     {
-        if (file_indices.empty())
+        if (files.empty())
             return;
 
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
@@ -776,7 +818,7 @@ struct StorageDistributedDirectoryMonitor::Batch
             }
             catch (const Exception & e)
             {
-                if (split_batch_on_failure && file_indices.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
+                if (split_batch_on_failure && files.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
                 {
                     tryLogCurrentException(parent.log, "Trying to split batch due to");
                     sendSeparateFiles();
@@ -796,49 +838,28 @@ struct StorageDistributedDirectoryMonitor::Batch
             }
             else
             {
-                std::vector<std::string> files;
-                for (auto file_index_info : file_indices | boost::adaptors::indexed())
-                {
-                    if (file_index_info.index() > 8)
-                    {
-                        files.push_back("...");
-                        break;
-                    }
-
-                    auto file_index = file_index_info.value();
-                    auto file_path = file_index_to_path.find(file_index);
-                    if (file_path != file_index_to_path.end())
-                        files.push_back(file_path->second);
-                    else
-                        files.push_back(fmt::format("#{}.bin (deleted)", file_index));
-                }
-                e.addMessage(fmt::format("While sending batch, size: {}, files: {}", file_indices.size(), fmt::join(files, "\n")));
-
+                e.addMessage(fmt::format("While sending a batch of {} files, files: {}", files.size(), fmt::join(files, "\n")));
                 throw;
             }
         }
 
         if (!batch_broken)
         {
-            LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", file_indices.size(), watch.elapsedMilliseconds());
+            LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", files.size(), watch.elapsedMilliseconds());
 
             auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
-            for (UInt64 file_index : file_indices)
-                parent.markAsSend(file_index_to_path.at(file_index));
+            for (const auto & file : files)
+                parent.markAsSend(file);
         }
         else if (!batch_marked_as_broken)
         {
-            LOG_ERROR(parent.log, "Marking a batch of {} files as broken.", file_indices.size());
+            LOG_ERROR(parent.log, "Marking a batch of {} files as broken, files: {}", files.size(), fmt::join(files, "\n"));
 
-            for (UInt64 file_idx : file_indices)
-            {
-                auto file_path = file_index_to_path.find(file_idx);
-                if (file_path != file_index_to_path.end())
-                    parent.markAsBroken(file_path->second);
-            }
+            for (const auto & file : files)
+                parent.markAsBroken(file);
         }
 
-        file_indices.clear();
+        files.clear();
         total_rows = 0;
         total_bytes = 0;
         recovered = false;
@@ -848,8 +869,11 @@ struct StorageDistributedDirectoryMonitor::Batch
 
     void writeText(WriteBuffer & out)
     {
-        for (UInt64 file_idx : file_indices)
-            out << file_idx << '\n';
+        for (const auto & file : files)
+        {
+            UInt64 file_index = parse<UInt64>(fs::path(file).stem());
+            out << file_index << '\n';
+        }
     }
 
     void readText(ReadBuffer & in)
@@ -858,8 +882,9 @@ struct StorageDistributedDirectoryMonitor::Batch
         {
             UInt64 idx;
             in >> idx >> "\n";
-            file_indices.push_back(idx);
+            files.push_back(fmt::format("{}/{}.bin", parent.path, idx));
         }
+
         recovered = true;
     }
 
@@ -871,14 +896,9 @@ private:
 
         IConnectionPool::Entry connection;
 
-        for (UInt64 file_idx : file_indices)
+        for (const auto & file : files)
         {
-            auto file_path = file_index_to_path.find(file_idx);
-            if (file_path == file_index_to_path.end())
-                throw Exception(ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO,
-                    "Failed to send batch: file with index {} is absent", file_idx);
-
-            ReadBufferFromFile in(file_path->second);
+            ReadBufferFromFile in(file);
             const auto & distributed_header = readDistributedHeader(in, parent.log);
 
             OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
@@ -892,7 +912,7 @@ private:
                 compression_expected = connection->getCompression() == Protocol::Compression::Enable;
 
                 LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
-                    file_indices.size(),
+                    files.size(),
                     connection->getDescription(),
                     formatReadableQuantity(total_rows),
                     formatReadableSizeWithBinarySuffix(total_bytes));
@@ -913,19 +933,11 @@ private:
     {
         size_t broken_files = 0;
 
-        for (UInt64 file_idx : file_indices)
+        for (const auto & file : files)
         {
-            auto file_path = file_index_to_path.find(file_idx);
-            if (file_path == file_index_to_path.end())
-            {
-                LOG_ERROR(parent.log, "Failed to send one file from batch: file with index {} is absent", file_idx);
-                ++broken_files;
-                continue;
-            }
-
             try
             {
-                ReadBufferFromFile in(file_path->second);
+                ReadBufferFromFile in(file);
                 const auto & distributed_header = readDistributedHeader(in, parent.log);
 
                 // this function is called in a separated thread, so we set up the trace context from the file
@@ -947,9 +959,11 @@ private:
             }
             catch (Exception & e)
             {
-                e.addMessage(fmt::format("While sending {}", file_path->second));
-                parent.maybeMarkAsBroken(file_path->second, e);
-                ++broken_files;
+                if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
+                {
+                    parent.markAsBroken(file);
+                    ++broken_files;
+                }
             }
         }
 
@@ -1029,13 +1043,18 @@ std::shared_ptr<ISource> StorageDistributedDirectoryMonitor::createSourceFromFil
     return std::make_shared<DirectoryMonitorSource>(file_name);
 }
 
-bool StorageDistributedDirectoryMonitor::addAndSchedule(size_t file_size, size_t ms)
+bool StorageDistributedDirectoryMonitor::addAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
 {
-    if (quit)
+    /// NOTE: It is better not to throw in this case, since the file is already
+    /// on disk (see DistributedSink), and it will be processed next time.
+    if (pending_files.isFinished())
         return false;
 
+    if (!pending_files.push(file_path))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
+
     {
-        std::lock_guard status_lock(status_mutex);
+        std::lock_guard lock(status_mutex);
         metric_pending_files.add();
         status.bytes_count += file_size;
         ++status.files_count;
@@ -1051,33 +1070,25 @@ StorageDistributedDirectoryMonitor::Status StorageDistributedDirectoryMonitor::g
     return current_status;
 }
 
-void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map<UInt64, std::string> & files)
+void StorageDistributedDirectoryMonitor::processFilesWithBatching()
 {
-    std::unordered_set<UInt64> file_indices_to_skip;
-
+    /// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
     if (fs::exists(current_batch_file_path))
     {
-        /// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
-        Batch batch(*this, files);
+        Batch batch(*this);
         ReadBufferFromFile in{current_batch_file_path};
         batch.readText(in);
-        file_indices_to_skip.insert(batch.file_indices.begin(), batch.file_indices.end());
         batch.send();
+
+        auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
+        fs::remove(current_batch_file_path);
     }
 
     std::unordered_map<BatchHeader, Batch, BatchHeader::Hash> header_to_batch;
 
-    for (const auto & file : files)
+    std::string file_path;
+    while (pending_files.tryPop(file_path))
     {
-        if (quit)
-            return;
-
-        UInt64 file_idx = file.first;
-        const String & file_path = file.second;
-
-        if (file_indices_to_skip.contains(file_idx))
-            continue;
-
         size_t total_rows = 0;
         size_t total_bytes = 0;
         Block header;
@@ -1116,8 +1127,9 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
         }
         catch (const Exception & e)
         {
-            if (maybeMarkAsBroken(file_path, e))
+            if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
             {
+                markAsBroken(file_path);
                 tryLogCurrentException(log, "File is marked broken due to");
                 continue;
             }
@@ -1131,9 +1143,9 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
             std::move(distributed_header.client_info),
             std::move(header)
         );
-        Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
+        Batch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
 
-        batch.file_indices.push_back(file_idx);
+        batch.files.push_back(file_path);
         batch.total_rows += total_rows;
         batch.total_bytes += total_bytes;
 
@@ -1161,16 +1173,10 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
 
 void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_path)
 {
-    const auto last_path_separator_pos = file_path.rfind('/');
-    const auto & base_path = file_path.substr(0, last_path_separator_pos + 1);
-    const auto & file_name = file_path.substr(last_path_separator_pos + 1);
-    const String & broken_path = fs::path(base_path) / "broken/";
-    const String & broken_file_path = fs::path(broken_path) / file_name;
-
-    fs::create_directory(broken_path);
+    const String & broken_file_path = fs::path(broken_path) / fs::path(file_path).filename();
 
     auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
-    auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, fs::path(relative_path) / "broken/");
+    auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, broken_relative_path);
 
     {
         std::lock_guard status_lock(status_mutex);
@@ -1204,21 +1210,9 @@ void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_pat
     fs::remove(file_path);
 }
 
-bool StorageDistributedDirectoryMonitor::maybeMarkAsBroken(const std::string & file_path, const Exception & e)
-{
-    /// Mark file as broken if necessary.
-    if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
-    {
-        markAsBroken(file_path);
-        return true;
-    }
-    else
-        return false;
-}
-
 std::string StorageDistributedDirectoryMonitor::getLoggerName() const
 {
-    return storage.getStorageID().getFullTableName() + ".DirectoryMonitor";
+    return storage.getStorageID().getFullTableName() + ".DirectoryMonitor." + disk->getName();
 }
 
 void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_relative_path)
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h
index 030d6acf6e2..313125e16bd 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DirectoryMonitor.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/BackgroundSchedulePool.h>
+#include <Common/ConcurrentBoundedQueue.h>
 #include <Client/ConnectionPool.h>
 
 #include <atomic>
@@ -38,7 +39,8 @@ public:
         const std::string & relative_path_,
         ConnectionPoolPtr pool_,
         ActionBlocker & monitor_blocker_,
-        BackgroundSchedulePool & bg_pool);
+        BackgroundSchedulePool & bg_pool,
+        bool initialize_from_disk);
 
     ~StorageDistributedDirectoryMonitor();
 
@@ -53,7 +55,7 @@ public:
     static std::shared_ptr<ISource> createSourceFromFile(const String & file_name);
 
     /// For scheduling via DistributedSink.
-    bool addAndSchedule(size_t file_size, size_t ms);
+    bool addAndSchedule(const std::string & file_path, size_t file_size, size_t ms);
 
     struct InternalStatus
     {
@@ -79,14 +81,15 @@ public:
 private:
     void run();
 
-    std::map<UInt64, std::string> getFiles();
-    bool processFiles(const std::map<UInt64, std::string> & files);
+    bool hasPendingFiles() const;
+
+    void initializeFilesFromDisk();
+    void processFiles();
     void processFile(const std::string & file_path);
-    void processFilesWithBatching(const std::map<UInt64, std::string> & files);
+    void processFilesWithBatching();
 
     void markAsBroken(const std::string & file_path);
     void markAsSend(const std::string & file_path);
-    bool maybeMarkAsBroken(const std::string & file_path, const Exception & e);
 
     std::string getLoggerName() const;
 
@@ -96,25 +99,33 @@ private:
     DiskPtr disk;
     std::string relative_path;
     std::string path;
+    std::string broken_relative_path;
+    std::string broken_path;
 
     const bool should_batch_inserts = false;
     const bool split_batch_on_failure = true;
     const bool dir_fsync = false;
     const size_t min_batched_block_size_rows = 0;
     const size_t min_batched_block_size_bytes = 0;
-    String current_batch_file_path;
+
+    /// This is pending data (due to some error) for should_batch_inserts==true
+    std::string current_batch_file_path;
+    /// This is pending data (due to some error) for should_batch_inserts==false
+    std::string current_batch_file;
 
     struct BatchHeader;
     struct Batch;
 
     std::mutex status_mutex;
+
     InternalStatus status;
 
+    ConcurrentBoundedQueue<std::string> pending_files;
+
     const std::chrono::milliseconds default_sleep_time;
     std::chrono::milliseconds sleep_time;
     const std::chrono::milliseconds max_sleep_time;
     std::chrono::time_point<std::chrono::system_clock> last_decrease_time {std::chrono::system_clock::now()};
-    std::atomic<bool> quit {false};
     std::mutex mutex;
     Poco::Logger * log;
     ActionBlocker & monitor_blocker;
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index bac13ea37cf..edbb2acc923 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -724,6 +724,9 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         return guard;
     };
 
+    std::vector<std::string> bin_files;
+    bin_files.reserve(dir_names.size());
+
     auto it = dir_names.begin();
     /// on first iteration write block to a temporary directory for subsequent
     /// hardlinking to ensure the inode is not freed until we're done
@@ -802,8 +805,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         }
 
         // Create hardlink here to reuse increment number
-        const std::string block_file_path(fs::path(path) / file_name);
-        createHardLink(first_file_tmp_path, block_file_path);
+        bin_files.push_back(fs::path(path) / file_name);
+        createHardLink(first_file_tmp_path, bin_files.back());
         auto dir_sync_guard = make_directory_sync_guard(*it);
     }
     ++it;
@@ -814,8 +817,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         const std::string path(fs::path(disk_path) / (data_path + *it));
         fs::create_directory(path);
 
-        const std::string block_file_path(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
-        createHardLink(first_file_tmp_path, block_file_path);
+        bin_files.push_back(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
+        createHardLink(first_file_tmp_path, bin_files.back());
         auto dir_sync_guard = make_directory_sync_guard(*it);
     }
 
@@ -826,10 +829,13 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
 
     /// Notify
     auto sleep_ms = context->getSettingsRef().distributed_directory_monitor_sleep_time_ms;
-    for (const auto & dir_name : dir_names)
+    for (size_t i = 0; i < dir_names.size(); ++i)
     {
-        auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name);
-        directory_monitor.addAndSchedule(file_size, sleep_ms.totalMilliseconds());
+        const auto & dir_name = dir_names[i];
+        const auto & bin_file = bin_files[i];
+
+        auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name, /* startup= */ false);
+        directory_monitor.addAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
     }
 }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 5516d6cadf0..a38beef983c 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1208,12 +1208,15 @@ void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
         const auto & dir_path = it->path();
         if (std::filesystem::is_directory(dir_path))
         {
+            /// Created by DistributedSink
             const auto & tmp_path = dir_path / "tmp";
-
-            /// "tmp" created by DistributedSink
             if (std::filesystem::is_directory(tmp_path) && std::filesystem::is_empty(tmp_path))
                 std::filesystem::remove(tmp_path);
 
+            const auto & broken_path = dir_path / "broken";
+            if (std::filesystem::is_directory(broken_path) && std::filesystem::is_empty(broken_path))
+                std::filesystem::remove(broken_path);
+
             if (std::filesystem::is_empty(dir_path))
             {
                 LOG_DEBUG(log, "Removing {} (used for async INSERT into Distributed)", dir_path.string());
@@ -1222,14 +1225,14 @@ void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
             }
             else
             {
-                requireDirectoryMonitor(disk, dir_path.filename().string());
+                requireDirectoryMonitor(disk, dir_path.filename().string(), /* startup= */ true);
             }
         }
     }
 }
 
 
-StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name)
+StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup)
 {
     const std::string & disk_path = disk->getPath();
     const std::string key(disk_path + name);
@@ -1243,7 +1246,8 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(
             *this, disk, relative_data_path + name,
             node_data.connection_pool,
             monitors_blocker,
-            getContext()->getDistributedSchedulePool());
+            getContext()->getDistributedSchedulePool(),
+            /* initialize_from_disk= */ startup);
     }
     return *node_data.directory_monitor;
 }
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 66fd7b77889..25a752fe795 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -166,7 +166,7 @@ private:
     /// create directory monitors for each existing subdirectory
     void createDirectoryMonitors(const DiskPtr & disk);
     /// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
-    StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name);
+    StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup);
 
     /// Return list of metrics for all created monitors
     /// (note that monitors are created lazily, i.e. until at least one INSERT executed)
diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
index f3be69d3279..b0d8284faa5 100644
--- a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
+++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference
@@ -1,7 +1,7 @@
 <Warning> DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done.
 <Warning> DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done.
-<Warning> default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
-<Warning> default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
+<Warning> default.dist_01683.DirectoryMonitor.default: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
+<Warning> default.dist_01683.DirectoryMonitor.default: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done
 1
 1
 2

From 16646c0923338adfd20e2853de4a1b9373534d58 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 10:07:44 +0100
Subject: [PATCH 105/333] Rename DirectoryMonitor::current_batch_file to
 current_file

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 14 +++++++-------
 src/Storages/Distributed/DirectoryMonitor.h   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index f2a3471d839..e536f1e63d8 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -570,7 +570,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
 
 bool StorageDistributedDirectoryMonitor::hasPendingFiles() const
 {
-    return fs::exists(current_batch_file_path) || !current_batch_file.empty() || !pending_files.empty();
+    return fs::exists(current_batch_file_path) || !current_file.empty() || !pending_files.empty();
 }
 
 void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
@@ -639,11 +639,11 @@ void StorageDistributedDirectoryMonitor::processFiles()
     else
     {
         /// Process unprocessed file.
-        if (!current_batch_file.empty())
-            processFile(current_batch_file);
+        if (!current_file.empty())
+            processFile(current_file);
 
-        while (pending_files.tryPop(current_batch_file))
-            processFile(current_batch_file);
+        while (pending_files.tryPop(current_file))
+            processFile(current_file);
     }
 }
 
@@ -694,7 +694,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
         {
             markAsBroken(file_path);
-            current_batch_file.clear();
+            current_file.clear();
         }
         throw;
     }
@@ -708,7 +708,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
 
     auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
     markAsSend(file_path);
-    current_batch_file.clear();
+    current_file.clear();
     LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds());
 }
 
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h
index 313125e16bd..06843b77a4b 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DirectoryMonitor.h
@@ -111,7 +111,7 @@ private:
     /// This is pending data (due to some error) for should_batch_inserts==true
     std::string current_batch_file_path;
     /// This is pending data (due to some error) for should_batch_inserts==false
-    std::string current_batch_file;
+    std::string current_file;
 
     struct BatchHeader;
     struct Batch;

From ef1e642e059510ff182a6a9dc97d34fdb7c1a071 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 10:12:50 +0100
Subject: [PATCH 106/333] Add log message to
 StorageDistributedDirectoryMonitor::addAndSchedule()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index e536f1e63d8..c04e54b6bba 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -1048,7 +1048,10 @@ bool StorageDistributedDirectoryMonitor::addAndSchedule(const std::string & file
     /// NOTE: It is better not to throw in this case, since the file is already
     /// on disk (see DistributedSink), and it will be processed next time.
     if (pending_files.isFinished())
+    {
+        LOG_DEBUG(log, "File {} had not been scheduled, since the table had been detached", file_path);
         return false;
+    }
 
     if (!pending_files.push(file_path))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");

From f0a2efa63014ff1349340e28647ab067698cf974 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 10:20:28 +0100
Subject: [PATCH 107/333] Always manipulate with absolute file paths in
 DirectoryMonitor

Otherwise on batch restore we can get the difference in file paths.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 16 ++++++++++------
 src/Storages/Distributed/DirectoryMonitor.h   |  3 ++-
 src/Storages/Distributed/DistributedSink.cpp  |  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index c04e54b6bba..eead8c8ea42 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -573,6 +573,12 @@ bool StorageDistributedDirectoryMonitor::hasPendingFiles() const
     return fs::exists(current_batch_file_path) || !current_file.empty() || !pending_files.empty();
 }
 
+void StorageDistributedDirectoryMonitor::addFile(const std::string & file_path)
+{
+    if (!pending_files.push(fs::absolute(file_path).string()))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot schedule a file '{}'", file_path);
+}
+
 void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
 {
     /// NOTE: This method does not requires to hold status_mutex, hence, no TSA
@@ -591,8 +597,7 @@ void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
             if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(base_name))
             {
                 const std::string & file_path_str = file_path.string();
-                if (!pending_files.push(file_path_str))
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
+                addFile(file_path_str);
                 bytes_count += fs::file_size(file_path);
             }
             else if (base_name != "tmp" && base_name != "broken")
@@ -882,7 +887,7 @@ struct StorageDistributedDirectoryMonitor::Batch
         {
             UInt64 idx;
             in >> idx >> "\n";
-            files.push_back(fmt::format("{}/{}.bin", parent.path, idx));
+            files.push_back(fs::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
         }
 
         recovered = true;
@@ -1043,7 +1048,7 @@ std::shared_ptr<ISource> StorageDistributedDirectoryMonitor::createSourceFromFil
     return std::make_shared<DirectoryMonitorSource>(file_name);
 }
 
-bool StorageDistributedDirectoryMonitor::addAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
+bool StorageDistributedDirectoryMonitor::addFileAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
 {
     /// NOTE: It is better not to throw in this case, since the file is already
     /// on disk (see DistributedSink), and it will be processed next time.
@@ -1053,8 +1058,7 @@ bool StorageDistributedDirectoryMonitor::addAndSchedule(const std::string & file
         return false;
     }
 
-    if (!pending_files.push(file_path))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
+    addFile(file_path);
 
     {
         std::lock_guard lock(status_mutex);
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h
index 06843b77a4b..9b1596d45e3 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DirectoryMonitor.h
@@ -55,7 +55,7 @@ public:
     static std::shared_ptr<ISource> createSourceFromFile(const String & file_name);
 
     /// For scheduling via DistributedSink.
-    bool addAndSchedule(const std::string & file_path, size_t file_size, size_t ms);
+    bool addFileAndSchedule(const std::string & file_path, size_t file_size, size_t ms);
 
     struct InternalStatus
     {
@@ -83,6 +83,7 @@ private:
 
     bool hasPendingFiles() const;
 
+    void addFile(const std::string & file_path);
     void initializeFilesFromDisk();
     void processFiles();
     void processFile(const std::string & file_path);
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index edbb2acc923..c9c235596db 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -835,7 +835,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         const auto & bin_file = bin_files[i];
 
         auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name, /* startup= */ false);
-        directory_monitor.addAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
+        directory_monitor.addFileAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
     }
 }
 

From 13a3e03f19b13f0210e93e28f433e3abee7b3fd9 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 10:27:59 +0100
Subject: [PATCH 108/333] Introduce
 StorageDistributedDirectoryMonitor::Batch::{de,}serialize()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 51 +++++++++++--------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index eead8c8ea42..fe5018db821 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -793,24 +793,7 @@ struct StorageDistributedDirectoryMonitor::Batch
             /// we must try to re-send exactly the same batches.
             /// So we save contents of the current batch into the current_batch_file_path file
             /// and truncate it afterwards if all went well.
-
-            /// Temporary file is required for atomicity.
-            String tmp_file{parent.current_batch_file_path + ".tmp"};
-
-            auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
-            if (fs::exists(tmp_file))
-                LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file));
-
-            {
-                WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT};
-                writeText(out);
-
-                out.finalize();
-                if (fsync)
-                    out.sync();
-            }
-
-            fs::rename(tmp_file, parent.current_batch_file_path);
+            serialize();
         }
 
         bool batch_broken = false;
@@ -872,6 +855,34 @@ struct StorageDistributedDirectoryMonitor::Batch
         fs::resize_file(parent.current_batch_file_path, 0);
     }
 
+    void serialize()
+    {
+        /// Temporary file is required for atomicity.
+        String tmp_file{parent.current_batch_file_path + ".tmp"};
+
+        auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
+        if (fs::exists(tmp_file))
+            LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file));
+
+        {
+            WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT};
+            writeText(out);
+
+            out.finalize();
+            if (fsync)
+                out.sync();
+        }
+
+        fs::rename(tmp_file, parent.current_batch_file_path);
+    }
+
+    void deserialize()
+    {
+        ReadBufferFromFile in{parent.current_batch_file_path};
+        readText(in);
+    }
+
+private:
     void writeText(WriteBuffer & out)
     {
         for (const auto & file : files)
@@ -893,7 +904,6 @@ struct StorageDistributedDirectoryMonitor::Batch
         recovered = true;
     }
 
-private:
     void sendBatch()
     {
         std::unique_ptr<RemoteInserter> remote;
@@ -1083,8 +1093,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
     if (fs::exists(current_batch_file_path))
     {
         Batch batch(*this);
-        ReadBufferFromFile in{current_batch_file_path};
-        batch.readText(in);
+        batch.deserialize();
         batch.send();
 
         auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);

From 0c19a75a1c3225d8ec6625e2ca37434245f63a4c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 10:29:59 +0100
Subject: [PATCH 109/333] Add log message for batch restore

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index fe5018db821..c86661eb2ec 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -1092,6 +1092,8 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
     /// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
     if (fs::exists(current_batch_file_path))
     {
+        LOG_DEBUG(log, "Restoring the batch");
+
         Batch batch(*this);
         batch.deserialize();
         batch.send();

From 22a39e29f760969607b728a9de671c18071cbfb4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 10:41:38 +0100
Subject: [PATCH 110/333] Add a comment for
 StorageDistributedDirectoryMonitor::Batch::recovered

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index c86661eb2ec..06aa5219be0 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -755,6 +755,7 @@ struct StorageDistributedDirectoryMonitor::Batch
 {
     size_t total_rows = 0;
     size_t total_bytes = 0;
+    /// Does the batch had been created from the files in current_batch.txt?
     bool recovered = false;
 
     StorageDistributedDirectoryMonitor & parent;

From 325a7b23050f3964704f87fabfacc92b0b737e9a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 16:53:51 +0100
Subject: [PATCH 111/333] Separate out DistributedHeader as
 DistributedAsyncInsertHeader

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp |  17 ++-
 .../DistributedAsyncInsertHeader.cpp          | 109 ++++++++++++++++++
 .../DistributedAsyncInsertHeader.h            |  38 ++++++
 3 files changed, 155 insertions(+), 9 deletions(-)
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertHeader.h

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index 06aa5219be0..469f2f90954 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -12,10 +12,9 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/Cluster.h>
 #include <Storages/Distributed/DirectoryMonitor.h>
-#include <Storages/Distributed/Defines.h>
+#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
 #include <Storages/StorageDistributed.h>
 #include <IO/ReadBufferFromFile.h>
-#include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromFile.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CheckingCompressedReadBuffer.h>
@@ -277,7 +276,7 @@ namespace
         return nullptr;
     }
 
-    void writeAndConvert(RemoteInserter & remote, const DistributedHeader & distributed_header, ReadBufferFromFile & in)
+    void writeAndConvert(RemoteInserter & remote, const DistributedAsyncInsertHeader & distributed_header, ReadBufferFromFile & in)
     {
         CompressedReadBuffer decompressing_in(in);
         NativeReader block_in(decompressing_in, distributed_header.revision);
@@ -296,7 +295,7 @@ namespace
     }
 
     void writeRemoteConvert(
-        const DistributedHeader & distributed_header,
+        const DistributedAsyncInsertHeader & distributed_header,
         RemoteInserter & remote,
         bool compression_expected,
         ReadBufferFromFile & in,
@@ -662,7 +661,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
 
         ReadBufferFromFile in(file_path);
-        const auto & distributed_header = readDistributedHeader(in, log);
+        const auto & distributed_header = readDistributedAsyncInsertHeader(in, log);
 
         thread_trace_context = std::make_unique<OpenTelemetry::TracingContextHolder>(__PRETTY_FUNCTION__,
             distributed_header.client_info.client_trace_context,
@@ -915,7 +914,7 @@ private:
         for (const auto & file : files)
         {
             ReadBufferFromFile in(file);
-            const auto & distributed_header = readDistributedHeader(in, parent.log);
+            const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
 
             OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
                 distributed_header.client_info.client_trace_context,
@@ -954,7 +953,7 @@ private:
             try
             {
                 ReadBufferFromFile in(file);
-                const auto & distributed_header = readDistributedHeader(in, parent.log);
+                const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
 
                 // this function is called in a separated thread, so we set up the trace context from the file
                 OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
@@ -1111,12 +1110,12 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
         size_t total_rows = 0;
         size_t total_bytes = 0;
         Block header;
-        DistributedHeader distributed_header;
+        DistributedAsyncInsertHeader distributed_header;
         try
         {
             /// Determine metadata of the current file and check if it is not broken.
             ReadBufferFromFile in{file_path};
-            distributed_header = readDistributedHeader(in, log);
+            distributed_header = readDistributedAsyncInsertHeader(in, log);
 
             if (distributed_header.rows)
             {
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
new file mode 100644
index 00000000000..19235c91cc6
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
@@ -0,0 +1,109 @@
+#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
+#include <Storages/Distributed/Defines.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <Formats/NativeReader.h>
+#include <Core/ProtocolDefines.h>
+#include <Common/logger_useful.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_READ_ALL_DATA;
+    extern const int CHECKSUM_DOESNT_MATCH;
+}
+
+DistributedAsyncInsertHeader readDistributedAsyncInsertHeader(ReadBufferFromFile & in, Poco::Logger * log)
+{
+    DistributedAsyncInsertHeader distributed_header;
+
+    UInt64 query_size;
+    readVarUInt(query_size, in);
+
+    if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER)
+    {
+        /// Read the header as a string.
+        String header_data;
+        readStringBinary(header_data, in);
+
+        /// Check the checksum of the header.
+        CityHash_v1_0_2::uint128 expected_checksum;
+        readPODBinary(expected_checksum, in);
+        CityHash_v1_0_2::uint128 calculated_checksum =
+            CityHash_v1_0_2::CityHash128(header_data.data(), header_data.size());
+        if (expected_checksum != calculated_checksum)
+        {
+            throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
+                            "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.",
+                            getHexUIntLowercase(expected_checksum.first), getHexUIntLowercase(expected_checksum.second),
+                            getHexUIntLowercase(calculated_checksum.first), getHexUIntLowercase(calculated_checksum.second));
+        }
+
+        /// Read the parts of the header.
+        ReadBufferFromString header_buf(header_data);
+
+        readVarUInt(distributed_header.revision, header_buf);
+        if (DBMS_TCP_PROTOCOL_VERSION < distributed_header.revision)
+        {
+            LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features.");
+        }
+
+        readStringBinary(distributed_header.insert_query, header_buf);
+        distributed_header.insert_settings.read(header_buf);
+
+        if (header_buf.hasPendingData())
+            distributed_header.client_info.read(header_buf, distributed_header.revision);
+
+        if (header_buf.hasPendingData())
+        {
+            readVarUInt(distributed_header.rows, header_buf);
+            readVarUInt(distributed_header.bytes, header_buf);
+            readStringBinary(distributed_header.block_header_string, header_buf);
+        }
+
+        if (header_buf.hasPendingData())
+        {
+            NativeReader header_block_in(header_buf, distributed_header.revision);
+            distributed_header.block_header = header_block_in.read();
+            if (!distributed_header.block_header)
+                throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
+                    "Cannot read header from the {} batch. Data was written with protocol version {}, current version: {}",
+                        in.getFileName(), distributed_header.revision, DBMS_TCP_PROTOCOL_VERSION);
+        }
+
+        if (header_buf.hasPendingData())
+        {
+            readVarUInt(distributed_header.shard_num, header_buf);
+            readStringBinary(distributed_header.cluster, header_buf);
+            readStringBinary(distributed_header.distributed_table, header_buf);
+            readStringBinary(distributed_header.remote_table, header_buf);
+        }
+
+        /// Add handling new data here, for example:
+        ///
+        /// if (header_buf.hasPendingData())
+        ///     readVarUInt(my_new_data, header_buf);
+        ///
+        /// And note that it is safe, because we have checksum and size for header.
+
+        return distributed_header;
+    }
+
+    if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT)
+    {
+        distributed_header.insert_settings.read(in, SettingsWriteFormat::BINARY);
+        readStringBinary(distributed_header.insert_query, in);
+        return distributed_header;
+    }
+
+    distributed_header.insert_query.resize(query_size);
+    in.readStrict(distributed_header.insert_query.data(), query_size);
+
+    return distributed_header;
+}
+
+}
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.h b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
new file mode 100644
index 00000000000..2c7a6477b6c
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <Core/Settings.h>
+#include <Core/Block.h>
+#include <Interpreters/ClientInfo.h>
+#include <base/types.h>
+#include <string>
+
+namespace DB
+{
+
+class ReadBufferFromFile;
+
+/// Header for the binary files that are stored on disk for async INSERT into Distributed.
+struct DistributedAsyncInsertHeader
+{
+    UInt64 revision = 0;
+    Settings insert_settings;
+    std::string insert_query;
+    ClientInfo client_info;
+
+    /// .bin file cannot have zero rows/bytes.
+    size_t rows = 0;
+    size_t bytes = 0;
+
+    UInt32 shard_num = 0;
+    std::string cluster;
+    std::string distributed_table;
+    std::string remote_table;
+
+    /// dumpStructure() of the header -- obsolete
+    std::string block_header_string;
+    Block block_header;
+};
+
+DistributedAsyncInsertHeader readDistributedAsyncInsertHeader(ReadBufferFromFile & in, Poco::Logger * log);
+
+}

From 33b13549ad6f39fabad4474ae6055458ce8a041f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 17:01:41 +0100
Subject: [PATCH 112/333] Separate out DirectoryMonitorSource as
 DistributedAsyncInsertSource

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 70 -------------------
 .../DistributedAsyncInsertSource.cpp          | 62 ++++++++++++++++
 .../DistributedAsyncInsertSource.h            | 28 ++++++++
 src/Storages/StorageFile.cpp                  |  7 +-
 4 files changed, 93 insertions(+), 74 deletions(-)
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertSource.cpp
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertSource.h

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index 469f2f90954..c51e9c6e061 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -988,76 +988,6 @@ private:
     }
 };
 
-class DirectoryMonitorSource : public ISource
-{
-public:
-
-    struct Data
-    {
-        std::unique_ptr<ReadBufferFromFile> in;
-        std::unique_ptr<CompressedReadBuffer> decompressing_in;
-        std::unique_ptr<NativeReader> block_in;
-
-        Poco::Logger * log = nullptr;
-
-        Block first_block;
-
-        explicit Data(const String & file_name)
-        {
-            in = std::make_unique<ReadBufferFromFile>(file_name);
-            decompressing_in = std::make_unique<CompressedReadBuffer>(*in);
-            log = &Poco::Logger::get("DirectoryMonitorSource");
-
-            auto distributed_header = readDistributedHeader(*in, log);
-            block_in = std::make_unique<NativeReader>(*decompressing_in, distributed_header.revision);
-
-            first_block = block_in->read();
-        }
-
-        Data(Data &&) = default;
-    };
-
-    explicit DirectoryMonitorSource(const String & file_name)
-        : DirectoryMonitorSource(Data(file_name))
-    {
-    }
-
-    explicit DirectoryMonitorSource(Data data_)
-        : ISource(data_.first_block.cloneEmpty())
-        , data(std::move(data_))
-    {
-    }
-
-    String getName() const override { return "DirectoryMonitorSource"; }
-
-protected:
-    Chunk generate() override
-    {
-        if (data.first_block)
-        {
-            size_t num_rows = data.first_block.rows();
-            Chunk res(data.first_block.getColumns(), num_rows);
-            data.first_block.clear();
-            return res;
-        }
-
-        auto block = data.block_in->read();
-        if (!block)
-            return {};
-
-        size_t num_rows = block.rows();
-        return Chunk(block.getColumns(), num_rows);
-    }
-
-private:
-    Data data;
-};
-
-std::shared_ptr<ISource> StorageDistributedDirectoryMonitor::createSourceFromFile(const String & file_name)
-{
-    return std::make_shared<DirectoryMonitorSource>(file_name);
-}
-
 bool StorageDistributedDirectoryMonitor::addFileAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
 {
     /// NOTE: It is better not to throw in this case, since the file is already
diff --git a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp
new file mode 100644
index 00000000000..782cbf9c026
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp
@@ -0,0 +1,62 @@
+#include <Storages/Distributed/DistributedAsyncInsertSource.h>
+#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Compression/CompressedReadBuffer.h>
+#include <Formats/NativeReader.h>
+#include <Poco/Logger.h>
+
+namespace DB
+{
+
+struct DistributedAsyncInsertSource::Data
+{
+    Poco::Logger * log = nullptr;
+
+    ReadBufferFromFile in;
+    CompressedReadBuffer decompressing_in;
+    NativeReader block_in;
+
+    Block first_block;
+
+    explicit Data(const String & file_name)
+        : log(&Poco::Logger::get("DistributedAsyncInsertSource"))
+        , in(file_name)
+        , decompressing_in(in)
+        , block_in(decompressing_in, readDistributedAsyncInsertHeader(in, log).revision)
+        , first_block(block_in.read())
+    {
+    }
+};
+
+DistributedAsyncInsertSource::DistributedAsyncInsertSource(const String & file_name)
+    : DistributedAsyncInsertSource(std::make_unique<Data>(file_name))
+{
+}
+
+DistributedAsyncInsertSource::DistributedAsyncInsertSource(std::unique_ptr<Data> data_)
+    : ISource(data_->first_block.cloneEmpty())
+    , data(std::move(data_))
+{
+}
+
+DistributedAsyncInsertSource::~DistributedAsyncInsertSource() = default;
+
+Chunk DistributedAsyncInsertSource::generate()
+{
+    if (data->first_block)
+    {
+        size_t num_rows = data->first_block.rows();
+        Chunk res(data->first_block.getColumns(), num_rows);
+        data->first_block.clear();
+        return res;
+    }
+
+    auto block = data->block_in.read();
+    if (!block)
+        return {};
+
+    size_t num_rows = block.rows();
+    return Chunk(block.getColumns(), num_rows);
+}
+
+}
diff --git a/src/Storages/Distributed/DistributedAsyncInsertSource.h b/src/Storages/Distributed/DistributedAsyncInsertSource.h
new file mode 100644
index 00000000000..35f846151dc
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertSource.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <Processors/ISource.h>
+#include <base/types.h>
+#include <memory>
+
+namespace DB
+{
+
+/// Source for the Distributed engine on-disk file for async INSERT.
+class DistributedAsyncInsertSource : public ISource
+{
+    struct Data;
+    explicit DistributedAsyncInsertSource(std::unique_ptr<Data> data);
+
+public:
+    explicit DistributedAsyncInsertSource(const String & file_name);
+    ~DistributedAsyncInsertSource() override;
+    String getName() const override { return "DistributedAsyncInsertSource"; }
+
+protected:
+    Chunk generate() override;
+
+private:
+    std::unique_ptr<Data> data;
+};
+
+}
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index e2a2f84bc72..5fd5664b9e6 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -3,7 +3,7 @@
 #include <Storages/ColumnsDescription.h>
 #include <Storages/StorageInMemoryMetadata.h>
 #include <Storages/PartitionedSink.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/Distributed/DistributedAsyncInsertSource.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Storages/ReadFromStorageProgress.h>
 
@@ -368,8 +368,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
         if (paths.empty())
             throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Cannot get table structure from file, because no files match specified name");
 
-        auto source = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]);
-        return ColumnsDescription(source->getOutputs().front().getHeader().getNamesAndTypesList());
+        return ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList());
     }
 
     if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format))
@@ -597,7 +596,7 @@ public:
                     /// Special case for distributed format. Defaults are not needed here.
                     if (storage->format_name == "Distributed")
                     {
-                        pipeline = std::make_unique<QueryPipeline>(StorageDistributedDirectoryMonitor::createSourceFromFile(current_path));
+                        pipeline = std::make_unique<QueryPipeline>(std::make_shared<DistributedAsyncInsertSource>(current_path));
                         reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
                         continue;
                     }

From 1c4659b8e705b9a4c46929524bd392eaa6609f1d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 18:12:42 +0100
Subject: [PATCH 113/333] Separate out Batch as DistributedAsyncInsertBatch
 (and also some helpers)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DirectoryMonitor.cpp | 598 +++---------------
 src/Storages/Distributed/DirectoryMonitor.h   |   8 +-
 .../DistributedAsyncInsertBatch.cpp           | 277 ++++++++
 .../Distributed/DistributedAsyncInsertBatch.h |  44 ++
 .../DistributedAsyncInsertHelpers.cpp         | 124 ++++
 .../DistributedAsyncInsertHelpers.h           |  35 +
 src/Storages/StorageDistributed.h             |   1 +
 7 files changed, 559 insertions(+), 528 deletions(-)
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertBatch.h
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp
 create mode 100644 src/Storages/Distributed/DistributedAsyncInsertHelpers.h

diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp
index c51e9c6e061..d5091003cb3 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -1,6 +1,19 @@
+#include <Storages/Distributed/DistributedAsyncInsertBatch.h>
+#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
+#include <Storages/Distributed/DistributedAsyncInsertHelpers.h>
+#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/StorageDistributed.h>
 #include <QueryPipeline/RemoteInserter.h>
 #include <Formats/NativeReader.h>
 #include <Processors/ISource.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/Cluster.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/ConnectionTimeouts.h>
+#include <IO/ConnectionTimeoutsContext.h>
+#include <Compression/CompressedReadBuffer.h>
+#include <Disks/IDisk.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/SipHash.h>
@@ -21,6 +34,7 @@
 #include <IO/ConnectionTimeouts.h>
 #include <IO/Operators.h>
 #include <Disks/IDisk.h>
+#include <Common/logger_useful.h>
 #include <boost/algorithm/string/find_iterator.hpp>
 #include <boost/algorithm/string/finder.hpp>
 #include <boost/range/adaptor/indexed.hpp>
@@ -41,13 +55,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int CANNOT_READ_ALL_DATA;
-    extern const int UNKNOWN_CODEC;
-    extern const int CANNOT_DECOMPRESS;
-    extern const int CHECKSUM_DOESNT_MATCH;
-    extern const int TOO_LARGE_SIZE_COMPRESSED;
-    extern const int ATTEMPT_TO_READ_AFTER_EOF;
-    extern const int EMPTY_DATA_PASSED;
     extern const int INCORRECT_FILE_NAME;
     extern const int MEMORY_LIMIT_EXCEEDED;
     extern const int DISTRIBUTED_BROKEN_BATCH_INFO;
@@ -64,296 +71,64 @@ namespace ErrorCodes
 
 namespace
 {
-    constexpr const std::chrono::minutes decrease_error_count_period{5};
 
-    template <typename PoolFactory>
-    ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, const Cluster::ShardsInfo & shards_info, Poco::Logger * log)
+template <typename PoolFactory>
+ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, const Cluster::ShardsInfo & shards_info, Poco::Logger * log)
+{
+    ConnectionPoolPtrs pools;
+
+    auto make_connection = [&](const Cluster::Address & address)
     {
-        ConnectionPoolPtrs pools;
-
-        auto make_connection = [&](const Cluster::Address & address)
+        try
         {
-            try
-            {
-                pools.emplace_back(factory(address));
-            }
-            catch (const Exception & e)
-            {
-                if (e.code() == ErrorCodes::INCORRECT_FILE_NAME)
-                {
-                    tryLogCurrentException(log);
-                    return;
-                }
-                throw;
-            }
-        };
-
-        for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it)
-        {
-            const std::string & dirname = boost::copy_range<std::string>(*it);
-            Cluster::Address address = Cluster::Address::fromFullString(dirname);
-            if (address.shard_index && dirname.ends_with("_all_replicas"))
-            {
-                if (address.shard_index > shards_info.size())
-                {
-                    LOG_ERROR(log, "No shard with shard_index={} ({})", address.shard_index, name);
-                    continue;
-                }
-
-                const auto & shard_info = shards_info[address.shard_index - 1];
-                size_t replicas = shard_info.per_replica_pools.size();
-
-                for (size_t replica_index = 1; replica_index <= replicas; ++replica_index)
-                {
-                    address.replica_index = static_cast<UInt32>(replica_index);
-                    make_connection(address);
-                }
-            }
-            else
-                make_connection(address);
+            pools.emplace_back(factory(address));
         }
-
-        return pools;
-    }
-
-    void assertChecksum(CityHash_v1_0_2::uint128 expected, CityHash_v1_0_2::uint128 calculated)
-    {
-        if (expected != calculated)
+        catch (const Exception & e)
         {
-            throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
-                            "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.",
-                            getHexUIntLowercase(expected.first), getHexUIntLowercase(expected.second),
-                            getHexUIntLowercase(calculated.first), getHexUIntLowercase(calculated.second));
+            if (e.code() == ErrorCodes::INCORRECT_FILE_NAME)
+            {
+                tryLogCurrentException(log);
+                return;
+            }
+            throw;
         }
-    }
-
-    struct DistributedHeader
-    {
-        UInt64 revision = 0;
-        Settings insert_settings;
-        std::string insert_query;
-        ClientInfo client_info;
-
-        /// .bin file cannot have zero rows/bytes.
-        size_t rows = 0;
-        size_t bytes = 0;
-
-        UInt32 shard_num = 0;
-        std::string cluster;
-        std::string distributed_table;
-        std::string remote_table;
-
-        /// dumpStructure() of the header -- obsolete
-        std::string block_header_string;
-        Block block_header;
     };
 
-    DistributedHeader readDistributedHeader(ReadBufferFromFile & in, Poco::Logger * log)
+    for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it)
     {
-        DistributedHeader distributed_header;
-
-        UInt64 query_size;
-        readVarUInt(query_size, in);
-
-        if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER)
+        const std::string & dirname = boost::copy_range<std::string>(*it);
+        Cluster::Address address = Cluster::Address::fromFullString(dirname);
+        if (address.shard_index && dirname.ends_with("_all_replicas"))
         {
-            /// Read the header as a string.
-            String header_data;
-            readStringBinary(header_data, in);
-
-            /// Check the checksum of the header.
-            CityHash_v1_0_2::uint128 checksum;
-            readPODBinary(checksum, in);
-            assertChecksum(checksum, CityHash_v1_0_2::CityHash128(header_data.data(), header_data.size()));
-
-            /// Read the parts of the header.
-            ReadBufferFromString header_buf(header_data);
-
-            readVarUInt(distributed_header.revision, header_buf);
-            if (DBMS_TCP_PROTOCOL_VERSION < distributed_header.revision)
+            if (address.shard_index > shards_info.size())
             {
-                LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features.");
+                LOG_ERROR(log, "No shard with shard_index={} ({})", address.shard_index, name);
+                continue;
             }
 
-            readStringBinary(distributed_header.insert_query, header_buf);
-            distributed_header.insert_settings.read(header_buf);
+            const auto & shard_info = shards_info[address.shard_index - 1];
+            size_t replicas = shard_info.per_replica_pools.size();
 
-            if (header_buf.hasPendingData())
-                distributed_header.client_info.read(header_buf, distributed_header.revision);
-
-            if (header_buf.hasPendingData())
+            for (size_t replica_index = 1; replica_index <= replicas; ++replica_index)
             {
-                readVarUInt(distributed_header.rows, header_buf);
-                readVarUInt(distributed_header.bytes, header_buf);
-                readStringBinary(distributed_header.block_header_string, header_buf);
+                address.replica_index = static_cast<UInt32>(replica_index);
+                make_connection(address);
             }
-
-            if (header_buf.hasPendingData())
-            {
-                NativeReader header_block_in(header_buf, distributed_header.revision);
-                distributed_header.block_header = header_block_in.read();
-                if (!distributed_header.block_header)
-                    throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
-                        "Cannot read header from the {} batch. Data was written with protocol version {}, current version: {}",
-                            in.getFileName(), distributed_header.revision, DBMS_TCP_PROTOCOL_VERSION);
-            }
-
-            if (header_buf.hasPendingData())
-            {
-                readVarUInt(distributed_header.shard_num, header_buf);
-                readStringBinary(distributed_header.cluster, header_buf);
-                readStringBinary(distributed_header.distributed_table, header_buf);
-                readStringBinary(distributed_header.remote_table, header_buf);
-            }
-
-            /// Add handling new data here, for example:
-            ///
-            /// if (header_buf.hasPendingData())
-            ///     readVarUInt(my_new_data, header_buf);
-            ///
-            /// And note that it is safe, because we have checksum and size for header.
-
-            return distributed_header;
         }
-
-        if (query_size == DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT)
-        {
-            distributed_header.insert_settings.read(in, SettingsWriteFormat::BINARY);
-            readStringBinary(distributed_header.insert_query, in);
-            return distributed_header;
-        }
-
-        distributed_header.insert_query.resize(query_size);
-        in.readStrict(distributed_header.insert_query.data(), query_size);
-
-        return distributed_header;
+        else
+            make_connection(address);
     }
 
-    /// 'remote_error' argument is used to decide whether some errors should be
-    /// ignored or not, in particular:
-    ///
-    /// - ATTEMPT_TO_READ_AFTER_EOF should not be ignored
-    ///   if we receive it from remote (receiver), since:
-    ///   - the sender will got ATTEMPT_TO_READ_AFTER_EOF when the client just go away,
-    ///     i.e. server had been restarted
-    ///   - since #18853 the file will be checked on the sender locally, and
-    ///     if there is something wrong with the file itself, we will receive
-    ///     ATTEMPT_TO_READ_AFTER_EOF not from the remote at first
-    ///     and mark batch as broken.
-    bool isFileBrokenErrorCode(int code, bool remote_error)
-    {
-        return code == ErrorCodes::CHECKSUM_DOESNT_MATCH
-            || code == ErrorCodes::EMPTY_DATA_PASSED
-            || code == ErrorCodes::TOO_LARGE_SIZE_COMPRESSED
-            || code == ErrorCodes::CANNOT_READ_ALL_DATA
-            || code == ErrorCodes::UNKNOWN_CODEC
-            || code == ErrorCodes::CANNOT_DECOMPRESS
-            || code == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO
-            || code == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_FILES
-            || (!remote_error && code == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
-    }
+    return pools;
+}
 
-    /// Can the batch be split and send files from batch one-by-one instead?
-    bool isSplittableErrorCode(int code, bool remote)
-    {
-        return code == ErrorCodes::MEMORY_LIMIT_EXCEEDED
-            /// FunctionRange::max_elements and similar
-            || code == ErrorCodes::ARGUMENT_OUT_OF_BOUND
-            || code == ErrorCodes::TOO_MANY_PARTS
-            || code == ErrorCodes::TOO_MANY_BYTES
-            || code == ErrorCodes::TOO_MANY_ROWS_OR_BYTES
-            || code == ErrorCodes::TOO_MANY_PARTITIONS
-            || code == ErrorCodes::DISTRIBUTED_TOO_MANY_PENDING_BYTES
-            || code == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO
-            || isFileBrokenErrorCode(code, remote)
-        ;
-    }
+uint64_t doubleToUInt64(double d)
+{
+    if (d >= static_cast<double>(std::numeric_limits<uint64_t>::max()))
+        return std::numeric_limits<uint64_t>::max();
+    return static_cast<uint64_t>(d);
+}
 
-    SyncGuardPtr getDirectorySyncGuard(bool dir_fsync, const DiskPtr & disk, const String & path)
-    {
-        if (dir_fsync)
-            return disk->getDirectorySyncGuard(path);
-        return nullptr;
-    }
-
-    void writeAndConvert(RemoteInserter & remote, const DistributedAsyncInsertHeader & distributed_header, ReadBufferFromFile & in)
-    {
-        CompressedReadBuffer decompressing_in(in);
-        NativeReader block_in(decompressing_in, distributed_header.revision);
-
-        while (Block block = block_in.read())
-        {
-            auto converting_dag = ActionsDAG::makeConvertingActions(
-                block.cloneEmpty().getColumnsWithTypeAndName(),
-                remote.getHeader().getColumnsWithTypeAndName(),
-                ActionsDAG::MatchColumnsMode::Name);
-
-            auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
-            converting_actions->execute(block);
-            remote.write(block);
-        }
-    }
-
-    void writeRemoteConvert(
-        const DistributedAsyncInsertHeader & distributed_header,
-        RemoteInserter & remote,
-        bool compression_expected,
-        ReadBufferFromFile & in,
-        Poco::Logger * log)
-    {
-        if (!remote.getHeader())
-        {
-            CheckingCompressedReadBuffer checking_in(in);
-            remote.writePrepared(checking_in);
-            return;
-        }
-
-        /// This is old format, that does not have header for the block in the file header,
-        /// applying ConvertingTransform in this case is not a big overhead.
-        ///
-        /// Anyway we can get header only from the first block, which contain all rows anyway.
-        if (!distributed_header.block_header)
-        {
-            LOG_TRACE(log, "Processing batch {} with old format (no header)", in.getFileName());
-
-            writeAndConvert(remote, distributed_header, in);
-            return;
-        }
-
-        if (!blocksHaveEqualStructure(distributed_header.block_header, remote.getHeader()))
-        {
-            LOG_WARNING(log,
-                "Structure does not match (remote: {}, local: {}), implicit conversion will be done",
-                remote.getHeader().dumpStructure(), distributed_header.block_header.dumpStructure());
-
-            writeAndConvert(remote, distributed_header, in);
-            return;
-        }
-
-        /// If connection does not use compression, we have to uncompress the data.
-        if (!compression_expected)
-        {
-            writeAndConvert(remote, distributed_header, in);
-            return;
-        }
-
-        if (distributed_header.revision != remote.getServerRevision())
-        {
-            writeAndConvert(remote, distributed_header, in);
-            return;
-        }
-
-        /// Otherwise write data as it was already prepared (more efficient path).
-        CheckingCompressedReadBuffer checking_in(in);
-        remote.writePrepared(checking_in);
-    }
-
-    uint64_t doubleToUInt64(double d)
-    {
-        if (d >= static_cast<double>(std::numeric_limits<uint64_t>::max()))
-            return std::numeric_limits<uint64_t>::max();
-        return static_cast<uint64_t>(d);
-    }
 }
 
 
@@ -425,13 +200,15 @@ void StorageDistributedDirectoryMonitor::shutdownAndDropAllData()
         task_handle->deactivate();
     }
 
-    auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
+    auto dir_sync_guard = getDirectorySyncGuard(relative_path);
     fs::remove_all(path);
 }
 
 
 void StorageDistributedDirectoryMonitor::run()
 {
+    constexpr const std::chrono::minutes decrease_error_count_period{5};
+
     std::lock_guard lock{mutex};
 
     bool do_sleep = false;
@@ -695,7 +472,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
             thread_trace_context->root_span.addAttribute(std::current_exception());
 
         e.addMessage(fmt::format("While sending {}", file_path));
-        if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
+        if (isDistributedSendBroken(e.code(), e.isRemoteException()))
         {
             markAsBroken(file_path);
             current_file.clear();
@@ -710,7 +487,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
         throw;
     }
 
-    auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
+    auto dir_sync_guard = getDirectorySyncGuard(relative_path);
     markAsSend(file_path);
     current_file.clear();
     LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds());
@@ -750,244 +527,6 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
     };
 };
 
-struct StorageDistributedDirectoryMonitor::Batch
-{
-    size_t total_rows = 0;
-    size_t total_bytes = 0;
-    /// Does the batch had been created from the files in current_batch.txt?
-    bool recovered = false;
-
-    StorageDistributedDirectoryMonitor & parent;
-    std::vector<std::string> files;
-
-    bool split_batch_on_failure = true;
-    bool fsync = false;
-    bool dir_fsync = false;
-
-    explicit Batch(StorageDistributedDirectoryMonitor & parent_)
-        : parent(parent_)
-        , split_batch_on_failure(parent.split_batch_on_failure)
-        , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert)
-        , dir_fsync(parent.dir_fsync)
-    {}
-
-    bool isEnoughSize() const
-    {
-        return (!parent.min_batched_block_size_rows && !parent.min_batched_block_size_bytes)
-            || (parent.min_batched_block_size_rows && total_rows >= parent.min_batched_block_size_rows)
-            || (parent.min_batched_block_size_bytes && total_bytes >= parent.min_batched_block_size_bytes);
-    }
-
-    void send()
-    {
-        if (files.empty())
-            return;
-
-        CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
-
-        Stopwatch watch;
-
-        if (!recovered)
-        {
-            /// For deduplication in Replicated tables to work, in case of error
-            /// we must try to re-send exactly the same batches.
-            /// So we save contents of the current batch into the current_batch_file_path file
-            /// and truncate it afterwards if all went well.
-            serialize();
-        }
-
-        bool batch_broken = false;
-        bool batch_marked_as_broken = false;
-        try
-        {
-            try
-            {
-                sendBatch();
-            }
-            catch (const Exception & e)
-            {
-                if (split_batch_on_failure && files.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
-                {
-                    tryLogCurrentException(parent.log, "Trying to split batch due to");
-                    sendSeparateFiles();
-                }
-                else
-                    throw;
-            }
-        }
-        catch (Exception & e)
-        {
-            if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
-            {
-                tryLogCurrentException(parent.log, "Failed to send batch due to");
-                batch_broken = true;
-                if (!e.isRemoteException() && e.code() == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_FILES)
-                    batch_marked_as_broken = true;
-            }
-            else
-            {
-                e.addMessage(fmt::format("While sending a batch of {} files, files: {}", files.size(), fmt::join(files, "\n")));
-                throw;
-            }
-        }
-
-        if (!batch_broken)
-        {
-            LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", files.size(), watch.elapsedMilliseconds());
-
-            auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
-            for (const auto & file : files)
-                parent.markAsSend(file);
-        }
-        else if (!batch_marked_as_broken)
-        {
-            LOG_ERROR(parent.log, "Marking a batch of {} files as broken, files: {}", files.size(), fmt::join(files, "\n"));
-
-            for (const auto & file : files)
-                parent.markAsBroken(file);
-        }
-
-        files.clear();
-        total_rows = 0;
-        total_bytes = 0;
-        recovered = false;
-
-        fs::resize_file(parent.current_batch_file_path, 0);
-    }
-
-    void serialize()
-    {
-        /// Temporary file is required for atomicity.
-        String tmp_file{parent.current_batch_file_path + ".tmp"};
-
-        auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
-        if (fs::exists(tmp_file))
-            LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file));
-
-        {
-            WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT};
-            writeText(out);
-
-            out.finalize();
-            if (fsync)
-                out.sync();
-        }
-
-        fs::rename(tmp_file, parent.current_batch_file_path);
-    }
-
-    void deserialize()
-    {
-        ReadBufferFromFile in{parent.current_batch_file_path};
-        readText(in);
-    }
-
-private:
-    void writeText(WriteBuffer & out)
-    {
-        for (const auto & file : files)
-        {
-            UInt64 file_index = parse<UInt64>(fs::path(file).stem());
-            out << file_index << '\n';
-        }
-    }
-
-    void readText(ReadBuffer & in)
-    {
-        while (!in.eof())
-        {
-            UInt64 idx;
-            in >> idx >> "\n";
-            files.push_back(fs::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
-        }
-
-        recovered = true;
-    }
-
-    void sendBatch()
-    {
-        std::unique_ptr<RemoteInserter> remote;
-        bool compression_expected = false;
-
-        IConnectionPool::Entry connection;
-
-        for (const auto & file : files)
-        {
-            ReadBufferFromFile in(file);
-            const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
-
-            OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
-                distributed_header.client_info.client_trace_context,
-                parent.storage.getContext()->getOpenTelemetrySpanLog());
-
-            if (!remote)
-            {
-                auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
-                connection = parent.pool->get(timeouts);
-                compression_expected = connection->getCompression() == Protocol::Compression::Enable;
-
-                LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
-                    files.size(),
-                    connection->getDescription(),
-                    formatReadableQuantity(total_rows),
-                    formatReadableSizeWithBinarySuffix(total_bytes));
-
-                remote = std::make_unique<RemoteInserter>(*connection, timeouts,
-                    distributed_header.insert_query,
-                    distributed_header.insert_settings,
-                    distributed_header.client_info);
-            }
-            writeRemoteConvert(distributed_header, *remote, compression_expected, in, parent.log);
-        }
-
-        if (remote)
-            remote->onFinish();
-    }
-
-    void sendSeparateFiles()
-    {
-        size_t broken_files = 0;
-
-        for (const auto & file : files)
-        {
-            try
-            {
-                ReadBufferFromFile in(file);
-                const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
-
-                // this function is called in a separated thread, so we set up the trace context from the file
-                OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
-                    distributed_header.client_info.client_trace_context,
-                    parent.storage.getContext()->getOpenTelemetrySpanLog());
-
-                auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
-                auto connection = parent.pool->get(timeouts);
-                bool compression_expected = connection->getCompression() == Protocol::Compression::Enable;
-
-                RemoteInserter remote(*connection, timeouts,
-                    distributed_header.insert_query,
-                    distributed_header.insert_settings,
-                    distributed_header.client_info);
-
-                writeRemoteConvert(distributed_header, remote, compression_expected, in, parent.log);
-                remote.onFinish();
-            }
-            catch (Exception & e)
-            {
-                if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
-                {
-                    parent.markAsBroken(file);
-                    ++broken_files;
-                }
-            }
-        }
-
-        if (broken_files)
-            throw Exception(ErrorCodes::DISTRIBUTED_BROKEN_BATCH_FILES,
-                "Failed to send {} files", broken_files);
-    }
-};
-
 bool StorageDistributedDirectoryMonitor::addFileAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
 {
     /// NOTE: It is better not to throw in this case, since the file is already
@@ -1024,15 +563,15 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
     {
         LOG_DEBUG(log, "Restoring the batch");
 
-        Batch batch(*this);
+        DistributedAsyncInsertBatch batch(*this);
         batch.deserialize();
         batch.send();
 
-        auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
+        auto dir_sync_guard = getDirectorySyncGuard(relative_path);
         fs::remove(current_batch_file_path);
     }
 
-    std::unordered_map<BatchHeader, Batch, BatchHeader::Hash> header_to_batch;
+    std::unordered_map<BatchHeader, DistributedAsyncInsertBatch, BatchHeader::Hash> header_to_batch;
 
     std::string file_path;
     while (pending_files.tryPop(file_path))
@@ -1075,7 +614,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
         }
         catch (const Exception & e)
         {
-            if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
+            if (isDistributedSendBroken(e.code(), e.isRemoteException()))
             {
                 markAsBroken(file_path);
                 tryLogCurrentException(log, "File is marked broken due to");
@@ -1091,7 +630,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
             std::move(distributed_header.client_info),
             std::move(header)
         );
-        Batch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
+        DistributedAsyncInsertBatch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
 
         batch.files.push_back(file_path);
         batch.total_rows += total_rows;
@@ -1105,12 +644,12 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
 
     for (auto & kv : header_to_batch)
     {
-        Batch & batch = kv.second;
+        DistributedAsyncInsertBatch & batch = kv.second;
         batch.send();
     }
 
     {
-        auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
+        auto dir_sync_guard = getDirectorySyncGuard(relative_path);
 
         /// current_batch.txt will not exist if there was no send
         /// (this is the case when all batches that was pending has been marked as pending)
@@ -1123,8 +662,8 @@ void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_p
 {
     const String & broken_file_path = fs::path(broken_path) / fs::path(file_path).filename();
 
-    auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
-    auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, broken_relative_path);
+    auto dir_sync_guard = getDirectorySyncGuard(relative_path);
+    auto broken_dir_sync_guard = getDirectorySyncGuard(broken_relative_path);
 
     {
         std::lock_guard status_lock(status_mutex);
@@ -1158,6 +697,13 @@ void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_pat
     fs::remove(file_path);
 }
 
+SyncGuardPtr StorageDistributedDirectoryMonitor::getDirectorySyncGuard(const std::string & dir_path)
+{
+    if (dir_fsync)
+        return disk->getDirectorySyncGuard(dir_path);
+    return nullptr;
+}
+
 std::string StorageDistributedDirectoryMonitor::getLoggerName() const
 {
     return storage.getStorageID().getFullTableName() + ".DirectoryMonitor." + disk->getName();
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h
index 9b1596d45e3..8515f5a16a1 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DirectoryMonitor.h
@@ -3,11 +3,11 @@
 #include <Core/BackgroundSchedulePool.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Client/ConnectionPool.h>
-
+#include <IO/ReadBufferFromFile.h>
+#include <Disks/IDisk.h>
 #include <atomic>
 #include <mutex>
 #include <condition_variable>
-#include <IO/ReadBufferFromFile.h>
 
 
 namespace CurrentMetrics { class Increment; }
@@ -32,6 +32,8 @@ class ISource;
   */
 class StorageDistributedDirectoryMonitor
 {
+    friend class DistributedAsyncInsertBatch;
+
 public:
     StorageDistributedDirectoryMonitor(
         StorageDistributed & storage_,
@@ -92,6 +94,8 @@ private:
     void markAsBroken(const std::string & file_path);
     void markAsSend(const std::string & file_path);
 
+    SyncGuardPtr getDirectorySyncGuard(const std::string & path);
+
     std::string getLoggerName() const;
 
     StorageDistributed & storage;
diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
new file mode 100644
index 00000000000..5f9e78a72e2
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@@ -0,0 +1,277 @@
+#include <Storages/Distributed/DistributedAsyncInsertBatch.h>
+#include <Storages/Distributed/DistributedAsyncInsertHelpers.h>
+#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
+#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/StorageDistributed.h>
+#include <QueryPipeline/RemoteInserter.h>
+#include <Common/CurrentMetrics.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromFile.h>
+
+namespace
+{
+
+namespace fs = std::filesystem;
+
+}
+
+namespace CurrentMetrics
+{
+    extern const Metric DistributedSend;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int MEMORY_LIMIT_EXCEEDED;
+    extern const int DISTRIBUTED_BROKEN_BATCH_INFO;
+    extern const int DISTRIBUTED_BROKEN_BATCH_FILES;
+    extern const int TOO_MANY_PARTS;
+    extern const int TOO_MANY_BYTES;
+    extern const int TOO_MANY_ROWS_OR_BYTES;
+    extern const int TOO_MANY_PARTITIONS;
+    extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
+    extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
+/// Can the batch be split and send files from batch one-by-one instead?
+bool isSplittableErrorCode(int code, bool remote)
+{
+    return code == ErrorCodes::MEMORY_LIMIT_EXCEEDED
+        /// FunctionRange::max_elements and similar
+        || code == ErrorCodes::ARGUMENT_OUT_OF_BOUND
+        || code == ErrorCodes::TOO_MANY_PARTS
+        || code == ErrorCodes::TOO_MANY_BYTES
+        || code == ErrorCodes::TOO_MANY_ROWS_OR_BYTES
+        || code == ErrorCodes::TOO_MANY_PARTITIONS
+        || code == ErrorCodes::DISTRIBUTED_TOO_MANY_PENDING_BYTES
+        || code == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO
+        || isDistributedSendBroken(code, remote)
+    ;
+}
+
+DistributedAsyncInsertBatch::DistributedAsyncInsertBatch(StorageDistributedDirectoryMonitor & parent_)
+    : parent(parent_)
+    , split_batch_on_failure(parent.split_batch_on_failure)
+    , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert)
+    , dir_fsync(parent.dir_fsync)
+{}
+
+bool DistributedAsyncInsertBatch::isEnoughSize() const
+{
+    return (!parent.min_batched_block_size_rows && !parent.min_batched_block_size_bytes)
+        || (parent.min_batched_block_size_rows && total_rows >= parent.min_batched_block_size_rows)
+        || (parent.min_batched_block_size_bytes && total_bytes >= parent.min_batched_block_size_bytes);
+}
+
+void DistributedAsyncInsertBatch::send()
+{
+    if (files.empty())
+        return;
+
+    CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
+
+    Stopwatch watch;
+
+    if (!recovered)
+    {
+        /// For deduplication in Replicated tables to work, in case of error
+        /// we must try to re-send exactly the same batches.
+        /// So we save contents of the current batch into the current_batch_file_path file
+        /// and truncate it afterwards if all went well.
+        serialize();
+    }
+
+    bool batch_broken = false;
+    bool batch_marked_as_broken = false;
+    try
+    {
+        try
+        {
+            sendBatch();
+        }
+        catch (const Exception & e)
+        {
+            if (split_batch_on_failure && files.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
+            {
+                tryLogCurrentException(parent.log, "Trying to split batch due to");
+                sendSeparateFiles();
+            }
+            else
+                throw;
+        }
+    }
+    catch (Exception & e)
+    {
+        if (isDistributedSendBroken(e.code(), e.isRemoteException()))
+        {
+            tryLogCurrentException(parent.log, "Failed to send batch due to");
+            batch_broken = true;
+            if (!e.isRemoteException() && e.code() == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_FILES)
+                batch_marked_as_broken = true;
+        }
+        else
+        {
+            e.addMessage(fmt::format("While sending a batch of {} files, files: {}", files.size(), fmt::join(files, "\n")));
+            throw;
+        }
+    }
+
+    if (!batch_broken)
+    {
+        LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", files.size(), watch.elapsedMilliseconds());
+
+        auto dir_sync_guard = parent.getDirectorySyncGuard(parent.relative_path);
+        for (const auto & file : files)
+            parent.markAsSend(file);
+    }
+    else if (!batch_marked_as_broken)
+    {
+        LOG_ERROR(parent.log, "Marking a batch of {} files as broken, files: {}", files.size(), fmt::join(files, "\n"));
+
+        for (const auto & file : files)
+            parent.markAsBroken(file);
+    }
+
+    files.clear();
+    total_rows = 0;
+    total_bytes = 0;
+    recovered = false;
+
+    fs::resize_file(parent.current_batch_file_path, 0);
+}
+
+void DistributedAsyncInsertBatch::serialize()
+{
+    /// Temporary file is required for atomicity.
+    String tmp_file{parent.current_batch_file_path + ".tmp"};
+
+    auto dir_sync_guard = parent.getDirectorySyncGuard(parent.relative_path);
+    if (fs::exists(tmp_file))
+        LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file));
+
+    {
+        WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT};
+        writeText(out);
+
+        out.finalize();
+        if (fsync)
+            out.sync();
+    }
+
+    fs::rename(tmp_file, parent.current_batch_file_path);
+}
+
+void DistributedAsyncInsertBatch::deserialize()
+{
+    ReadBufferFromFile in{parent.current_batch_file_path};
+    readText(in);
+}
+
+void DistributedAsyncInsertBatch::writeText(WriteBuffer & out)
+{
+    for (const auto & file : files)
+    {
+        UInt64 file_index = parse<UInt64>(fs::path(file).stem());
+        out << file_index << '\n';
+    }
+}
+
+void DistributedAsyncInsertBatch::readText(ReadBuffer & in)
+{
+    while (!in.eof())
+    {
+        UInt64 idx;
+        in >> idx >> "\n";
+        files.push_back(fs::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
+    }
+
+    recovered = true;
+}
+
+void DistributedAsyncInsertBatch::sendBatch()
+{
+    std::unique_ptr<RemoteInserter> remote;
+    bool compression_expected = false;
+
+    IConnectionPool::Entry connection;
+
+    for (const auto & file : files)
+    {
+        ReadBufferFromFile in(file);
+        const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
+
+        OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
+            distributed_header.client_info.client_trace_context,
+            parent.storage.getContext()->getOpenTelemetrySpanLog());
+
+        if (!remote)
+        {
+            auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
+            connection = parent.pool->get(timeouts);
+            compression_expected = connection->getCompression() == Protocol::Compression::Enable;
+
+            LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
+                files.size(),
+                connection->getDescription(),
+                formatReadableQuantity(total_rows),
+                formatReadableSizeWithBinarySuffix(total_bytes));
+
+            remote = std::make_unique<RemoteInserter>(*connection, timeouts,
+                distributed_header.insert_query,
+                distributed_header.insert_settings,
+                distributed_header.client_info);
+        }
+        writeRemoteConvert(distributed_header, *remote, compression_expected, in, parent.log);
+    }
+
+    if (remote)
+        remote->onFinish();
+}
+
+void DistributedAsyncInsertBatch::sendSeparateFiles()
+{
+    size_t broken_files = 0;
+
+    for (const auto & file : files)
+    {
+        try
+        {
+            ReadBufferFromFile in(file);
+            const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
+
+            // This function is called in a separated thread, so we set up the trace context from the file
+            OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
+                distributed_header.client_info.client_trace_context,
+                parent.storage.getContext()->getOpenTelemetrySpanLog());
+
+            auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
+            auto connection = parent.pool->get(timeouts);
+            bool compression_expected = connection->getCompression() == Protocol::Compression::Enable;
+
+            RemoteInserter remote(*connection, timeouts,
+                distributed_header.insert_query,
+                distributed_header.insert_settings,
+                distributed_header.client_info);
+
+            writeRemoteConvert(distributed_header, remote, compression_expected, in, parent.log);
+            remote.onFinish();
+        }
+        catch (Exception & e)
+        {
+            if (isDistributedSendBroken(e.code(), e.isRemoteException()))
+            {
+                parent.markAsBroken(file);
+                ++broken_files;
+            }
+        }
+    }
+
+    if (broken_files)
+        throw Exception(ErrorCodes::DISTRIBUTED_BROKEN_BATCH_FILES,
+            "Failed to send {} files", broken_files);
+}
+
+}
diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.h b/src/Storages/Distributed/DistributedAsyncInsertBatch.h
new file mode 100644
index 00000000000..8b3320155fa
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace DB
+{
+
+class StorageDistributedDirectoryMonitor;
+class WriteBuffer;
+class ReadBuffer;
+
+class DistributedAsyncInsertBatch
+{
+public:
+    explicit DistributedAsyncInsertBatch(StorageDistributedDirectoryMonitor & parent_);
+
+    bool isEnoughSize() const;
+    void send();
+
+    void serialize();
+    void deserialize();
+
+    size_t total_rows = 0;
+    size_t total_bytes = 0;
+    std::vector<std::string> files;
+
+private:
+    void writeText(WriteBuffer & out);
+    void readText(ReadBuffer & in);
+    void sendBatch();
+    void sendSeparateFiles();
+
+    StorageDistributedDirectoryMonitor & parent;
+
+    /// Does the batch had been created from the files in current_batch.txt?
+    bool recovered = false;
+
+    bool split_batch_on_failure = true;
+    bool fsync = false;
+    bool dir_fsync = false;
+};
+
+}
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp b/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp
new file mode 100644
index 00000000000..98073ba1e08
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp
@@ -0,0 +1,124 @@
+#include <Storages/Distributed/DistributedAsyncInsertHelpers.h>
+#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
+#include <Interpreters/ActionsDAG.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Compression/CompressedReadBuffer.h>
+#include <Compression/CheckingCompressedReadBuffer.h>
+#include <IO/ReadBufferFromFile.h>
+#include <QueryPipeline/RemoteInserter.h>
+#include <Formats/NativeReader.h>
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_READ_ALL_DATA;
+    extern const int UNKNOWN_CODEC;
+    extern const int CANNOT_DECOMPRESS;
+    extern const int CHECKSUM_DOESNT_MATCH;
+    extern const int TOO_LARGE_SIZE_COMPRESSED;
+    extern const int ATTEMPT_TO_READ_AFTER_EOF;
+    extern const int EMPTY_DATA_PASSED;
+    extern const int DISTRIBUTED_BROKEN_BATCH_INFO;
+    extern const int DISTRIBUTED_BROKEN_BATCH_FILES;
+}
+
+/// 'remote_error' argument is used to decide whether some errors should be
+/// ignored or not, in particular:
+///
+/// - ATTEMPT_TO_READ_AFTER_EOF should not be ignored
+///   if we receive it from remote (receiver), since:
+///   - the sender will got ATTEMPT_TO_READ_AFTER_EOF when the client just go away,
+///     i.e. server had been restarted
+///   - since #18853 the file will be checked on the sender locally, and
+///     if there is something wrong with the file itself, we will receive
+///     ATTEMPT_TO_READ_AFTER_EOF not from the remote at first
+///     and mark batch as broken.
+bool isDistributedSendBroken(int code, bool remote_error)
+{
+    return code == ErrorCodes::CHECKSUM_DOESNT_MATCH
+        || code == ErrorCodes::EMPTY_DATA_PASSED
+        || code == ErrorCodes::TOO_LARGE_SIZE_COMPRESSED
+        || code == ErrorCodes::CANNOT_READ_ALL_DATA
+        || code == ErrorCodes::UNKNOWN_CODEC
+        || code == ErrorCodes::CANNOT_DECOMPRESS
+        || code == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO
+        || code == ErrorCodes::DISTRIBUTED_BROKEN_BATCH_FILES
+        || (!remote_error && code == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
+}
+
+void writeAndConvert(RemoteInserter & remote, const DistributedAsyncInsertHeader & distributed_header, ReadBufferFromFile & in)
+{
+    CompressedReadBuffer decompressing_in(in);
+    NativeReader block_in(decompressing_in, distributed_header.revision);
+
+    while (Block block = block_in.read())
+    {
+        auto converting_dag = ActionsDAG::makeConvertingActions(
+            block.cloneEmpty().getColumnsWithTypeAndName(),
+            remote.getHeader().getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Name);
+
+        auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
+        converting_actions->execute(block);
+        remote.write(block);
+    }
+}
+
+void writeRemoteConvert(
+    const DistributedAsyncInsertHeader & distributed_header,
+    RemoteInserter & remote,
+    bool compression_expected,
+    ReadBufferFromFile & in,
+    Poco::Logger * log)
+{
+    if (!remote.getHeader())
+    {
+        CheckingCompressedReadBuffer checking_in(in);
+        remote.writePrepared(checking_in);
+        return;
+    }
+
+    /// This is old format, that does not have header for the block in the file header,
+    /// applying ConvertingTransform in this case is not a big overhead.
+    ///
+    /// Anyway we can get header only from the first block, which contain all rows anyway.
+    if (!distributed_header.block_header)
+    {
+        LOG_TRACE(log, "Processing batch {} with old format (no header)", in.getFileName());
+
+        writeAndConvert(remote, distributed_header, in);
+        return;
+    }
+
+    if (!blocksHaveEqualStructure(distributed_header.block_header, remote.getHeader()))
+    {
+        LOG_WARNING(log,
+            "Structure does not match (remote: {}, local: {}), implicit conversion will be done",
+            remote.getHeader().dumpStructure(), distributed_header.block_header.dumpStructure());
+
+        writeAndConvert(remote, distributed_header, in);
+        return;
+    }
+
+    /// If connection does not use compression, we have to uncompress the data.
+    if (!compression_expected)
+    {
+        writeAndConvert(remote, distributed_header, in);
+        return;
+    }
+
+    if (distributed_header.revision != remote.getServerRevision())
+    {
+        writeAndConvert(remote, distributed_header, in);
+        return;
+    }
+
+    /// Otherwise write data as it was already prepared (more efficient path).
+    CheckingCompressedReadBuffer checking_in(in);
+    remote.writePrepared(checking_in);
+}
+
+}
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHelpers.h b/src/Storages/Distributed/DistributedAsyncInsertHelpers.h
new file mode 100644
index 00000000000..9543450418c
--- /dev/null
+++ b/src/Storages/Distributed/DistributedAsyncInsertHelpers.h
@@ -0,0 +1,35 @@
+#pragma once
+
+namespace Poco
+{
+class Logger;
+}
+
+namespace DB
+{
+
+struct DistributedAsyncInsertHeader;
+class ReadBufferFromFile;
+class RemoteInserter;
+
+/// 'remote_error' argument is used to decide whether some errors should be
+/// ignored or not, in particular:
+///
+/// - ATTEMPT_TO_READ_AFTER_EOF should not be ignored
+///   if we receive it from remote (receiver), since:
+///   - the sender will got ATTEMPT_TO_READ_AFTER_EOF when the client just go away,
+///     i.e. server had been restarted
+///   - since #18853 the file will be checked on the sender locally, and
+///     if there is something wrong with the file itself, we will receive
+///     ATTEMPT_TO_READ_AFTER_EOF not from the remote at first
+///     and mark batch as broken.
+bool isDistributedSendBroken(int code, bool remote_error);
+
+void writeRemoteConvert(
+    const DistributedAsyncInsertHeader & distributed_header,
+    RemoteInserter & remote,
+    bool compression_expected,
+    ReadBufferFromFile & in,
+    Poco::Logger * log);
+
+}
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 25a752fe795..432aee047e8 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -38,6 +38,7 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 class StorageDistributed final : public IStorage, WithContext
 {
     friend class DistributedSink;
+    friend class DistributedAsyncInsertBatch;
     friend class StorageDistributedDirectoryMonitor;
     friend class StorageSystemDistributionQueue;
 

From b5434eac3b189a6c588dbc17fba43e180809ed07 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 18:55:14 +0100
Subject: [PATCH 114/333] Rename StorageDistributedDirectoryMonitor to
 DistributedAsyncInsertDirectoryQueue

Since #44922 it is not a directory monitor anymore.

v2: Remove unused error codes
v3: Contains some header fixes due to conflicts with master
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Loggers/OwnSplitChannel.cpp               |  2 +-
 .../DistributedAsyncInsertBatch.cpp           |  4 +-
 .../Distributed/DistributedAsyncInsertBatch.h |  6 +-
 ... DistributedAsyncInsertDirectoryQueue.cpp} | 66 +++++++------------
 ...=> DistributedAsyncInsertDirectoryQueue.h} | 25 +++++--
 src/Storages/Distributed/DistributedSink.cpp  |  4 +-
 src/Storages/StorageDistributed.cpp           | 22 +++----
 src/Storages/StorageDistributed.h             | 14 ++--
 .../System/StorageSystemDistributionQueue.cpp |  3 +-
 src/TableFunctions/ITableFunctionFileLike.cpp |  1 -
 ...etry_insert_on_distributed_table.reference |  4 +-
 11 files changed, 71 insertions(+), 80 deletions(-)
 rename src/Storages/Distributed/{DirectoryMonitor.cpp => DistributedAsyncInsertDirectoryQueue.cpp} (90%)
 rename src/Storages/Distributed/{DirectoryMonitor.h => DistributedAsyncInsertDirectoryQueue.h} (81%)

diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 7974d5212e1..7a7456823ed 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -56,7 +56,7 @@ void OwnSplitChannel::tryLogSplit(const Poco::Message & msg)
     /// breaking some functionality because of unexpected "File not
     /// found" (or similar) error.
     ///
-    /// For example StorageDistributedDirectoryMonitor will mark batch
+    /// For example DistributedAsyncInsertDirectoryQueue will mark batch
     /// as broken, some MergeTree code can also be affected.
     ///
     /// Also note, that we cannot log the exception here, since this
diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
index 5f9e78a72e2..80350600caa 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@@ -1,7 +1,7 @@
 #include <Storages/Distributed/DistributedAsyncInsertBatch.h>
 #include <Storages/Distributed/DistributedAsyncInsertHelpers.h>
 #include <Storages/Distributed/DistributedAsyncInsertHeader.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h>
 #include <Storages/StorageDistributed.h>
 #include <QueryPipeline/RemoteInserter.h>
 #include <Common/CurrentMetrics.h>
@@ -52,7 +52,7 @@ bool isSplittableErrorCode(int code, bool remote)
     ;
 }
 
-DistributedAsyncInsertBatch::DistributedAsyncInsertBatch(StorageDistributedDirectoryMonitor & parent_)
+DistributedAsyncInsertBatch::DistributedAsyncInsertBatch(DistributedAsyncInsertDirectoryQueue & parent_)
     : parent(parent_)
     , split_batch_on_failure(parent.split_batch_on_failure)
     , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert)
diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.h b/src/Storages/Distributed/DistributedAsyncInsertBatch.h
index 8b3320155fa..867a0de89fa 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.h
@@ -6,14 +6,14 @@
 namespace DB
 {
 
-class StorageDistributedDirectoryMonitor;
+class DistributedAsyncInsertDirectoryQueue;
 class WriteBuffer;
 class ReadBuffer;
 
 class DistributedAsyncInsertBatch
 {
 public:
-    explicit DistributedAsyncInsertBatch(StorageDistributedDirectoryMonitor & parent_);
+    explicit DistributedAsyncInsertBatch(DistributedAsyncInsertDirectoryQueue & parent_);
 
     bool isEnoughSize() const;
     void send();
@@ -31,7 +31,7 @@ private:
     void sendBatch();
     void sendSeparateFiles();
 
-    StorageDistributedDirectoryMonitor & parent;
+    DistributedAsyncInsertDirectoryQueue & parent;
 
     /// Does the batch had been created from the files in current_batch.txt?
     bool recovered = false;
diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
similarity index 90%
rename from src/Storages/Distributed/DirectoryMonitor.cpp
rename to src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index d5091003cb3..2d25c1822d8 100644
--- a/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -1,7 +1,7 @@
 #include <Storages/Distributed/DistributedAsyncInsertBatch.h>
 #include <Storages/Distributed/DistributedAsyncInsertHeader.h>
 #include <Storages/Distributed/DistributedAsyncInsertHelpers.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h>
 #include <Storages/StorageDistributed.h>
 #include <QueryPipeline/RemoteInserter.h>
 #include <Formats/NativeReader.h>
@@ -11,7 +11,6 @@
 #include <IO/ReadBufferFromFile.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ConnectionTimeouts.h>
-#include <IO/ConnectionTimeoutsContext.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Disks/IDisk.h>
 #include <Common/CurrentMetrics.h>
@@ -22,19 +21,9 @@
 #include <Common/ActionBlocker.h>
 #include <Common/formatReadable.h>
 #include <Common/Stopwatch.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/Cluster.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
-#include <Storages/Distributed/DistributedAsyncInsertHeader.h>
-#include <Storages/StorageDistributed.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/WriteBufferFromFile.h>
-#include <Compression/CompressedReadBuffer.h>
-#include <Compression/CheckingCompressedReadBuffer.h>
-#include <IO/ConnectionTimeouts.h>
-#include <IO/Operators.h>
-#include <Disks/IDisk.h>
 #include <Common/logger_useful.h>
+#include <Compression/CheckingCompressedReadBuffer.h>
+#include <IO/Operators.h>
 #include <boost/algorithm/string/find_iterator.hpp>
 #include <boost/algorithm/string/finder.hpp>
 #include <boost/range/adaptor/indexed.hpp>
@@ -56,15 +45,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int INCORRECT_FILE_NAME;
-    extern const int MEMORY_LIMIT_EXCEEDED;
-    extern const int DISTRIBUTED_BROKEN_BATCH_INFO;
-    extern const int DISTRIBUTED_BROKEN_BATCH_FILES;
-    extern const int TOO_MANY_PARTS;
-    extern const int TOO_MANY_BYTES;
-    extern const int TOO_MANY_ROWS_OR_BYTES;
-    extern const int TOO_MANY_PARTITIONS;
-    extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
-    extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int LOGICAL_ERROR;
 }
 
@@ -132,7 +112,7 @@ uint64_t doubleToUInt64(double d)
 }
 
 
-StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
+DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue(
     StorageDistributed & storage_,
     const DiskPtr & disk_,
     const std::string & relative_path_,
@@ -172,7 +152,7 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
 }
 
 
-StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
+DistributedAsyncInsertDirectoryQueue::~DistributedAsyncInsertDirectoryQueue()
 {
     if (!pending_files.isFinished())
     {
@@ -181,7 +161,7 @@ StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
     }
 }
 
-void StorageDistributedDirectoryMonitor::flushAllData()
+void DistributedAsyncInsertDirectoryQueue::flushAllData()
 {
     if (pending_files.isFinished())
         return;
@@ -192,7 +172,7 @@ void StorageDistributedDirectoryMonitor::flushAllData()
     processFiles();
 }
 
-void StorageDistributedDirectoryMonitor::shutdownAndDropAllData()
+void DistributedAsyncInsertDirectoryQueue::shutdownAndDropAllData()
 {
     if (!pending_files.isFinished())
     {
@@ -205,7 +185,7 @@ void StorageDistributedDirectoryMonitor::shutdownAndDropAllData()
 }
 
 
-void StorageDistributedDirectoryMonitor::run()
+void DistributedAsyncInsertDirectoryQueue::run()
 {
     constexpr const std::chrono::minutes decrease_error_count_period{5};
 
@@ -271,7 +251,7 @@ void StorageDistributedDirectoryMonitor::run()
 }
 
 
-ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name, const StorageDistributed & storage)
+ConnectionPoolPtr DistributedAsyncInsertDirectoryQueue::createPool(const std::string & name, const StorageDistributed & storage)
 {
     const auto pool_factory = [&storage, &name] (const Cluster::Address & address) -> ConnectionPoolPtr
     {
@@ -344,18 +324,18 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
         settings.distributed_replica_error_cap);
 }
 
-bool StorageDistributedDirectoryMonitor::hasPendingFiles() const
+bool DistributedAsyncInsertDirectoryQueue::hasPendingFiles() const
 {
     return fs::exists(current_batch_file_path) || !current_file.empty() || !pending_files.empty();
 }
 
-void StorageDistributedDirectoryMonitor::addFile(const std::string & file_path)
+void DistributedAsyncInsertDirectoryQueue::addFile(const std::string & file_path)
 {
     if (!pending_files.push(fs::absolute(file_path).string()))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot schedule a file '{}'", file_path);
 }
 
-void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
+void DistributedAsyncInsertDirectoryQueue::initializeFilesFromDisk()
 {
     /// NOTE: This method does not requires to hold status_mutex, hence, no TSA
     /// annotations in the header file.
@@ -413,7 +393,7 @@ void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
         status.broken_bytes_count = broken_bytes_count;
     }
 }
-void StorageDistributedDirectoryMonitor::processFiles()
+void DistributedAsyncInsertDirectoryQueue::processFiles()
 {
     if (should_batch_inserts)
         processFilesWithBatching();
@@ -428,7 +408,7 @@ void StorageDistributedDirectoryMonitor::processFiles()
     }
 }
 
-void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path)
+void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_path)
 {
     OpenTelemetry::TracingContextHolderPtr thread_trace_context;
 
@@ -493,7 +473,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
     LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds());
 }
 
-struct StorageDistributedDirectoryMonitor::BatchHeader
+struct DistributedAsyncInsertDirectoryQueue::BatchHeader
 {
     Settings settings;
     String query;
@@ -527,7 +507,7 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
     };
 };
 
-bool StorageDistributedDirectoryMonitor::addFileAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
+bool DistributedAsyncInsertDirectoryQueue::addFileAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
 {
     /// NOTE: It is better not to throw in this case, since the file is already
     /// on disk (see DistributedSink), and it will be processed next time.
@@ -549,14 +529,14 @@ bool StorageDistributedDirectoryMonitor::addFileAndSchedule(const std::string &
     return task_handle->scheduleAfter(ms, false);
 }
 
-StorageDistributedDirectoryMonitor::Status StorageDistributedDirectoryMonitor::getStatus()
+DistributedAsyncInsertDirectoryQueue::Status DistributedAsyncInsertDirectoryQueue::getStatus()
 {
     std::lock_guard status_lock(status_mutex);
     Status current_status{status, path, monitor_blocker.isCancelled()};
     return current_status;
 }
 
-void StorageDistributedDirectoryMonitor::processFilesWithBatching()
+void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching()
 {
     /// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
     if (fs::exists(current_batch_file_path))
@@ -658,7 +638,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching()
     }
 }
 
-void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_path)
+void DistributedAsyncInsertDirectoryQueue::markAsBroken(const std::string & file_path)
 {
     const String & broken_file_path = fs::path(broken_path) / fs::path(file_path).filename();
 
@@ -683,7 +663,7 @@ void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_p
     LOG_ERROR(log, "Renamed `{}` to `{}`", file_path, broken_file_path);
 }
 
-void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_path)
+void DistributedAsyncInsertDirectoryQueue::markAsSend(const std::string & file_path)
 {
     size_t file_size = fs::file_size(file_path);
 
@@ -697,19 +677,19 @@ void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_pat
     fs::remove(file_path);
 }
 
-SyncGuardPtr StorageDistributedDirectoryMonitor::getDirectorySyncGuard(const std::string & dir_path)
+SyncGuardPtr DistributedAsyncInsertDirectoryQueue::getDirectorySyncGuard(const std::string & dir_path)
 {
     if (dir_fsync)
         return disk->getDirectorySyncGuard(dir_path);
     return nullptr;
 }
 
-std::string StorageDistributedDirectoryMonitor::getLoggerName() const
+std::string DistributedAsyncInsertDirectoryQueue::getLoggerName() const
 {
     return storage.getStorageID().getFullTableName() + ".DirectoryMonitor." + disk->getName();
 }
 
-void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_relative_path)
+void DistributedAsyncInsertDirectoryQueue::updatePath(const std::string & new_relative_path)
 {
     task_handle->deactivate();
     std::lock_guard lock{mutex};
diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
similarity index 81%
rename from src/Storages/Distributed/DirectoryMonitor.h
rename to src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
index 8515f5a16a1..90e3d563b4b 100644
--- a/src/Storages/Distributed/DirectoryMonitor.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
@@ -27,15 +27,28 @@ using ProcessorPtr = std::shared_ptr<IProcessor>;
 
 class ISource;
 
-/** Details of StorageDistributed.
-  * This type is not designed for standalone use.
-  */
-class StorageDistributedDirectoryMonitor
+/** Queue for async INSERT Into Distributed engine (insert_distributed_sync=0).
+ *
+ * Files are added from two places:
+ * - from filesystem at startup (StorageDistributed::startup())
+ * - on INSERT via DistributedSink
+ *
+ * Later, in background, those files will be send to the remote nodes.
+ *
+ * The behaviour of this queue can be configured via the following settings:
+ * - distributed_directory_monitor_batch_inserts
+ * - distributed_directory_monitor_split_batch_on_failure
+ * - distributed_directory_monitor_sleep_time_ms
+ * - distributed_directory_monitor_max_sleep_time_ms
+ * NOTE: It worth to rename the settings too
+ * ("directory_monitor" in settings looks too internal).
+ */
+class DistributedAsyncInsertDirectoryQueue
 {
     friend class DistributedAsyncInsertBatch;
 
 public:
-    StorageDistributedDirectoryMonitor(
+    DistributedAsyncInsertDirectoryQueue(
         StorageDistributed & storage_,
         const DiskPtr & disk_,
         const std::string & relative_path_,
@@ -44,7 +57,7 @@ public:
         BackgroundSchedulePool & bg_pool,
         bool initialize_from_disk);
 
-    ~StorageDistributedDirectoryMonitor();
+    ~DistributedAsyncInsertDirectoryQueue();
 
     static ConnectionPoolPtr createPool(const std::string & name, const StorageDistributed & storage);
 
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index c9c235596db..38aa26fbe0c 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -1,5 +1,5 @@
 #include <Storages/Distributed/DistributedSink.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h>
 #include <Storages/Distributed/Defines.h>
 #include <Storages/StorageDistributed.h>
 #include <Disks/StoragePolicy.h>
@@ -834,7 +834,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         const auto & dir_name = dir_names[i];
         const auto & bin_file = bin_files[i];
 
-        auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name, /* startup= */ false);
+        auto & directory_monitor = storage.getDirectoryQueue(disk, dir_name, /* startup= */ false);
         directory_monitor.addFileAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
     }
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index a38beef983c..d95c703216e 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1094,7 +1094,7 @@ void StorageDistributed::initializeFromDisk()
     {
         pool.scheduleOrThrowOnError([&]()
         {
-            createDirectoryMonitors(disk);
+            initializeDirectoryQueuesForDisk(disk);
         });
     }
     pool.wait();
@@ -1133,7 +1133,7 @@ void StorageDistributed::shutdown()
 void StorageDistributed::drop()
 {
     // Some INSERT in-between shutdown() and drop() can call
-    // requireDirectoryMonitor() again, so call shutdown() to clear them, but
+    // getDirectoryQueue() again, so call shutdown() to clear them, but
     // when the drop() (this function) executed none of INSERT is allowed in
     // parallel.
     //
@@ -1196,7 +1196,7 @@ StoragePolicyPtr StorageDistributed::getStoragePolicy() const
     return storage_policy;
 }
 
-void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
+void StorageDistributed::initializeDirectoryQueuesForDisk(const DiskPtr & disk)
 {
     const std::string path(disk->getPath() + relative_data_path);
     fs::create_directories(path);
@@ -1225,14 +1225,14 @@ void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
             }
             else
             {
-                requireDirectoryMonitor(disk, dir_path.filename().string(), /* startup= */ true);
+                getDirectoryQueue(disk, dir_path.filename().string(), /* startup= */ true);
             }
         }
     }
 }
 
 
-StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup)
+DistributedAsyncInsertDirectoryQueue & StorageDistributed::getDirectoryQueue(const DiskPtr & disk, const std::string & name, bool startup)
 {
     const std::string & disk_path = disk->getPath();
     const std::string key(disk_path + name);
@@ -1241,8 +1241,8 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(
     auto & node_data = cluster_nodes_data[key];
     if (!node_data.directory_monitor)
     {
-        node_data.connection_pool = StorageDistributedDirectoryMonitor::createPool(name, *this);
-        node_data.directory_monitor = std::make_unique<StorageDistributedDirectoryMonitor>(
+        node_data.connection_pool = DistributedAsyncInsertDirectoryQueue::createPool(name, *this);
+        node_data.directory_monitor = std::make_unique<DistributedAsyncInsertDirectoryQueue>(
             *this, disk, relative_data_path + name,
             node_data.connection_pool,
             monitors_blocker,
@@ -1252,9 +1252,9 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(
     return *node_data.directory_monitor;
 }
 
-std::vector<StorageDistributedDirectoryMonitor::Status> StorageDistributed::getDirectoryMonitorsStatuses() const
+std::vector<DistributedAsyncInsertDirectoryQueue::Status> StorageDistributed::getDirectoryQueueStatuses() const
 {
-    std::vector<StorageDistributedDirectoryMonitor::Status> statuses;
+    std::vector<DistributedAsyncInsertDirectoryQueue::Status> statuses;
     std::lock_guard lock(cluster_nodes_mutex);
     statuses.reserve(cluster_nodes_data.size());
     for (const auto & node : cluster_nodes_data)
@@ -1265,7 +1265,7 @@ std::vector<StorageDistributedDirectoryMonitor::Status> StorageDistributed::getD
 std::optional<UInt64> StorageDistributed::totalBytes(const Settings &) const
 {
     UInt64 total_bytes = 0;
-    for (const auto & status : getDirectoryMonitorsStatuses())
+    for (const auto & status : getDirectoryQueueStatuses())
         total_bytes += status.bytes_count;
     return total_bytes;
 }
@@ -1426,7 +1426,7 @@ void StorageDistributed::flushClusterNodesAllData(ContextPtr local_context)
     /// Sync SYSTEM FLUSH DISTRIBUTED with TRUNCATE
     auto table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
 
-    std::vector<std::shared_ptr<StorageDistributedDirectoryMonitor>> directory_monitors;
+    std::vector<std::shared_ptr<DistributedAsyncInsertDirectoryQueue>> directory_monitors;
 
     {
         std::lock_guard lock(cluster_nodes_mutex);
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 432aee047e8..df4c16f0f67 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -2,7 +2,7 @@
 
 #include <Storages/IStorage.h>
 #include <Storages/IStorageCluster.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
+#include <Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h>
 #include <Storages/Distributed/DistributedSettings.h>
 #include <Storages/getStructureOfRemoteTable.h>
 #include <Common/SimpleIncrement.h>
@@ -39,7 +39,7 @@ class StorageDistributed final : public IStorage, WithContext
 {
     friend class DistributedSink;
     friend class DistributedAsyncInsertBatch;
-    friend class StorageDistributedDirectoryMonitor;
+    friend class DistributedAsyncInsertDirectoryQueue;
     friend class StorageSystemDistributionQueue;
 
 public:
@@ -165,15 +165,15 @@ private:
     const String & getRelativeDataPath() const { return relative_data_path; }
 
     /// create directory monitors for each existing subdirectory
-    void createDirectoryMonitors(const DiskPtr & disk);
-    /// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
-    StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup);
+    void initializeDirectoryQueuesForDisk(const DiskPtr & disk);
+    /// ensure directory queue thread and connection pool created by disk and subdirectory name
+    DistributedAsyncInsertDirectoryQueue & getDirectoryQueue(const DiskPtr & disk, const std::string & name, bool startup);
 
     /// Return list of metrics for all created monitors
     /// (note that monitors are created lazily, i.e. until at least one INSERT executed)
     ///
     /// Used by StorageSystemDistributionQueue
-    std::vector<StorageDistributedDirectoryMonitor::Status> getDirectoryMonitorsStatuses() const;
+    std::vector<DistributedAsyncInsertDirectoryQueue::Status> getDirectoryQueueStatuses() const;
 
     static IColumn::Selector createSelector(ClusterPtr cluster, const ColumnWithTypeAndName & result);
     /// Apply the following settings:
@@ -248,7 +248,7 @@ private:
 
     struct ClusterNodeData
     {
-        std::shared_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
+        std::shared_ptr<DistributedAsyncInsertDirectoryQueue> directory_monitor;
         ConnectionPoolPtr connection_pool;
     };
     std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp
index 34cff7df65d..d57269f0638 100644
--- a/src/Storages/System/StorageSystemDistributionQueue.cpp
+++ b/src/Storages/System/StorageSystemDistributionQueue.cpp
@@ -3,7 +3,6 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Storages/System/StorageSystemDistributionQueue.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/StorageDistributed.h>
 #include <Storages/VirtualColumnUtils.h>
@@ -174,7 +173,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, Cont
 
         auto & distributed_table = dynamic_cast<StorageDistributed &>(*tables[database][table]);
 
-        for (const auto & status : distributed_table.getDirectoryMonitorsStatuses())
+        for (const auto & status : distributed_table.getDirectoryQueueStatuses())
         {
             size_t col_num = 0;
             res_columns[col_num++]->insert(database);
diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp
index 8cbffc10e5a..bbaf2b68418 100644
--- a/src/TableFunctions/ITableFunctionFileLike.cpp
+++ b/src/TableFunctions/ITableFunctionFileLike.cpp
@@ -7,7 +7,6 @@
 #include <Common/Exception.h>
 
 #include <Storages/StorageFile.h>
-#include <Storages/Distributed/DirectoryMonitor.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 
 #include <Interpreters/evaluateConstantExpression.h>
diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference
index dde07d4540d..81a1f62ca29 100644
--- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference
+++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference
@@ -1,7 +1,7 @@
 {"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
 {"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
-{"operation_name":"void DB::StorageDistributedDirectoryMonitor::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
-{"operation_name":"void DB::StorageDistributedDirectoryMonitor::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
+{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
+{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
 {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}
 {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"}
 {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"}

From e10fb142fd124455b12d0b359c9866f0349ef26c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 20:37:12 +0100
Subject: [PATCH 115/333] Fix race for distributed sends from disk

Before it was initialized from disk only on startup, but if some INSERT
can create the object before, then, it will lead to the situation when
it will not be initialized.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../DistributedAsyncInsertDirectoryQueue.cpp  |  6 +--
 .../DistributedAsyncInsertDirectoryQueue.h    |  3 +-
 src/Storages/Distributed/DistributedSink.cpp  | 38 +++++++++----------
 src/Storages/StorageDistributed.cpp           |  7 ++--
 src/Storages/StorageDistributed.h             |  8 +++-
 5 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 2d25c1822d8..3993e9065d5 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -118,8 +118,7 @@ DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue(
     const std::string & relative_path_,
     ConnectionPoolPtr pool_,
     ActionBlocker & monitor_blocker_,
-    BackgroundSchedulePool & bg_pool,
-    bool initialize_from_disk)
+    BackgroundSchedulePool & bg_pool)
     : storage(storage_)
     , pool(std::move(pool_))
     , disk(disk_)
@@ -144,8 +143,7 @@ DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue(
 {
     fs::create_directory(broken_path);
 
-    if (initialize_from_disk)
-        initializeFilesFromDisk();
+    initializeFilesFromDisk();
 
     task_handle = bg_pool.createTask(getLoggerName() + "/Bg", [this]{ run(); });
     task_handle->activateAndSchedule();
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
index 90e3d563b4b..de8bb813824 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h
@@ -54,8 +54,7 @@ public:
         const std::string & relative_path_,
         ConnectionPoolPtr pool_,
         ActionBlocker & monitor_blocker_,
-        BackgroundSchedulePool & bg_pool,
-        bool initialize_from_disk);
+        BackgroundSchedulePool & bg_pool);
 
     ~DistributedAsyncInsertDirectoryQueue();
 
diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 38aa26fbe0c..3c4e493c34a 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -724,8 +724,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         return guard;
     };
 
-    std::vector<std::string> bin_files;
-    bin_files.reserve(dir_names.size());
+    auto sleep_ms = context->getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds();
+    size_t file_size;
 
     auto it = dir_names.begin();
     /// on first iteration write block to a temporary directory for subsequent
@@ -804,10 +804,16 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
                 out.sync();
         }
 
+        file_size = fs::file_size(first_file_tmp_path);
+
         // Create hardlink here to reuse increment number
-        bin_files.push_back(fs::path(path) / file_name);
-        createHardLink(first_file_tmp_path, bin_files.back());
-        auto dir_sync_guard = make_directory_sync_guard(*it);
+        auto bin_file = (fs::path(path) / file_name).string();
+        auto & directory_queue = storage.getDirectoryQueue(disk, *it);
+        {
+            createHardLink(first_file_tmp_path, bin_file);
+            auto dir_sync_guard = make_directory_sync_guard(*it);
+        }
+        directory_queue.addFileAndSchedule(bin_file, file_size, sleep_ms);
     }
     ++it;
 
@@ -817,26 +823,18 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         const std::string path(fs::path(disk_path) / (data_path + *it));
         fs::create_directory(path);
 
-        bin_files.push_back(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
-        createHardLink(first_file_tmp_path, bin_files.back());
-        auto dir_sync_guard = make_directory_sync_guard(*it);
+        auto bin_file = (fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin")).string();
+        auto & directory_monitor = storage.getDirectoryQueue(disk, *it);
+        {
+            createHardLink(first_file_tmp_path, bin_file);
+            auto dir_sync_guard = make_directory_sync_guard(*it);
+        }
+        directory_monitor.addFileAndSchedule(bin_file, file_size, sleep_ms);
     }
 
-    auto file_size = fs::file_size(first_file_tmp_path);
     /// remove the temporary file, enabling the OS to reclaim inode after all threads
     /// have removed their corresponding files
     fs::remove(first_file_tmp_path);
-
-    /// Notify
-    auto sleep_ms = context->getSettingsRef().distributed_directory_monitor_sleep_time_ms;
-    for (size_t i = 0; i < dir_names.size(); ++i)
-    {
-        const auto & dir_name = dir_names[i];
-        const auto & bin_file = bin_files[i];
-
-        auto & directory_monitor = storage.getDirectoryQueue(disk, dir_name, /* startup= */ false);
-        directory_monitor.addFileAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
-    }
 }
 
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index d95c703216e..9f674ce3bed 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1225,14 +1225,14 @@ void StorageDistributed::initializeDirectoryQueuesForDisk(const DiskPtr & disk)
             }
             else
             {
-                getDirectoryQueue(disk, dir_path.filename().string(), /* startup= */ true);
+                getDirectoryQueue(disk, dir_path.filename().string());
             }
         }
     }
 }
 
 
-DistributedAsyncInsertDirectoryQueue & StorageDistributed::getDirectoryQueue(const DiskPtr & disk, const std::string & name, bool startup)
+DistributedAsyncInsertDirectoryQueue & StorageDistributed::getDirectoryQueue(const DiskPtr & disk, const std::string & name)
 {
     const std::string & disk_path = disk->getPath();
     const std::string key(disk_path + name);
@@ -1246,8 +1246,7 @@ DistributedAsyncInsertDirectoryQueue & StorageDistributed::getDirectoryQueue(con
             *this, disk, relative_data_path + name,
             node_data.connection_pool,
             monitors_blocker,
-            getContext()->getDistributedSchedulePool(),
-            /* initialize_from_disk= */ startup);
+            getContext()->getDistributedSchedulePool());
     }
     return *node_data.directory_monitor;
 }
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index df4c16f0f67..3a7fae44708 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -166,8 +166,12 @@ private:
 
     /// create directory monitors for each existing subdirectory
     void initializeDirectoryQueuesForDisk(const DiskPtr & disk);
-    /// ensure directory queue thread and connection pool created by disk and subdirectory name
-    DistributedAsyncInsertDirectoryQueue & getDirectoryQueue(const DiskPtr & disk, const std::string & name, bool startup);
+
+    /// Get directory queue thread and connection pool created by disk and subdirectory name
+    ///
+    /// Used for the INSERT into Distributed in case of insert_distributed_sync==1, from DistributedSink.
+    DistributedAsyncInsertDirectoryQueue & getDirectoryQueue(const DiskPtr & disk, const std::string & name);
+
 
     /// Return list of metrics for all created monitors
     /// (note that monitors are created lazily, i.e. until at least one INSERT executed)

From e83699a8d3b72de7ab5ca3295dcabc59492a7fc4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 20:51:04 +0100
Subject: [PATCH 116/333] Improve comment for
 DistributedAsyncInsertDirectoryQueue::initializeFilesFromDisk()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp     | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 3993e9065d5..6530ebc578a 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -335,8 +335,9 @@ void DistributedAsyncInsertDirectoryQueue::addFile(const std::string & file_path
 
 void DistributedAsyncInsertDirectoryQueue::initializeFilesFromDisk()
 {
-    /// NOTE: This method does not requires to hold status_mutex, hence, no TSA
-    /// annotations in the header file.
+    /// NOTE: This method does not requires to hold status_mutex (because this
+    /// object is not in the list that the caller may iterate over), hence, no
+    /// TSA annotations in the header file.
 
     fs::directory_iterator end;
 

From a76d7b22c1ff11c775986cef17d91c500ca9e7ce Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 20:51:16 +0100
Subject: [PATCH 117/333] Use existing public methods of StorageDistributed in
 DistributedSink

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DistributedSink.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 3c4e493c34a..14febb1a2b4 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -340,9 +340,9 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si
         size_t rows = shard_block.rows();
 
         span.addAttribute("clickhouse.shard_num", shard_info.shard_num);
-        span.addAttribute("clickhouse.cluster", this->storage.cluster_name);
-        span.addAttribute("clickhouse.distributed", this->storage.getStorageID().getFullNameNotQuoted());
-        span.addAttribute("clickhouse.remote", [this]() { return storage.remote_database + "." + storage.remote_table; });
+        span.addAttribute("clickhouse.cluster", storage.cluster_name);
+        span.addAttribute("clickhouse.distributed", storage.getStorageID().getFullNameNotQuoted());
+        span.addAttribute("clickhouse.remote", [this]() { return storage.getRemoteDatabaseName() + "." + storage.getRemoteTableName(); });
         span.addAttribute("clickhouse.rows", rows);
         span.addAttribute("clickhouse.bytes", [&shard_block]() { return toString(shard_block.bytes()); });
 
@@ -476,7 +476,7 @@ void DistributedSink::writeSync(const Block & block)
 
     span.addAttribute("clickhouse.start_shard", start);
     span.addAttribute("clickhouse.end_shard", end);
-    span.addAttribute("db.statement", this->query_string);
+    span.addAttribute("db.statement", query_string);
 
     if (num_shards > 1)
     {
@@ -659,9 +659,9 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const
 {
     OpenTelemetry::SpanHolder span(__PRETTY_FUNCTION__);
     span.addAttribute("clickhouse.shard_num", shard_info.shard_num);
-    span.addAttribute("clickhouse.cluster", this->storage.cluster_name);
-    span.addAttribute("clickhouse.distributed", this->storage.getStorageID().getFullNameNotQuoted());
-    span.addAttribute("clickhouse.remote", [this]() { return storage.remote_database + "." + storage.remote_table; });
+    span.addAttribute("clickhouse.cluster", storage.cluster_name);
+    span.addAttribute("clickhouse.distributed", storage.getStorageID().getFullNameNotQuoted());
+    span.addAttribute("clickhouse.remote", [this]() { return storage.getRemoteDatabaseName() + "." + storage.getRemoteTableName(); });
     span.addAttribute("clickhouse.rows", [&block]() { return toString(block.rows()); });
     span.addAttribute("clickhouse.bytes", [&block]() { return toString(block.bytes()); });
 
@@ -782,9 +782,9 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
             }
 
             writeVarUInt(shard_info.shard_num, header_buf);
-            writeStringBinary(this->storage.cluster_name, header_buf);
-            writeStringBinary(this->storage.getStorageID().getFullNameNotQuoted(), header_buf);
-            writeStringBinary(this->storage.remote_database + "." + this->storage.remote_table, header_buf);
+            writeStringBinary(storage.cluster_name, header_buf);
+            writeStringBinary(storage.getStorageID().getFullNameNotQuoted(), header_buf);
+            writeStringBinary(storage.getRemoteDatabaseName() + "." + storage.getRemoteTableName(), header_buf);
 
             /// Add new fields here, for example:
             /// writeVarUInt(my_new_data, header_buf);

From 00115c6615c311faffc40d24476c1ef51d40312c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 20:54:49 +0100
Subject: [PATCH 118/333] Rename readDistributedAsyncInsertHeader()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DistributedAsyncInsertBatch.cpp      | 4 ++--
 .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp      | 4 ++--
 src/Storages/Distributed/DistributedAsyncInsertHeader.cpp     | 2 +-
 src/Storages/Distributed/DistributedAsyncInsertHeader.h       | 4 ++--
 src/Storages/Distributed/DistributedAsyncInsertSource.cpp     | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
index 80350600caa..948b4186ec6 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@@ -201,7 +201,7 @@ void DistributedAsyncInsertBatch::sendBatch()
     for (const auto & file : files)
     {
         ReadBufferFromFile in(file);
-        const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
+        const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);
 
         OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
             distributed_header.client_info.client_trace_context,
@@ -240,7 +240,7 @@ void DistributedAsyncInsertBatch::sendSeparateFiles()
         try
         {
             ReadBufferFromFile in(file);
-            const auto & distributed_header = readDistributedAsyncInsertHeader(in, parent.log);
+            const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);
 
             // This function is called in a separated thread, so we set up the trace context from the file
             OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 6530ebc578a..b58096182ec 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -417,7 +417,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_
         CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
 
         ReadBufferFromFile in(file_path);
-        const auto & distributed_header = readDistributedAsyncInsertHeader(in, log);
+        const auto & distributed_header = DistributedAsyncInsertHeader::read(in, log);
 
         thread_trace_context = std::make_unique<OpenTelemetry::TracingContextHolder>(__PRETTY_FUNCTION__,
             distributed_header.client_info.client_trace_context,
@@ -563,7 +563,7 @@ void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching()
         {
             /// Determine metadata of the current file and check if it is not broken.
             ReadBufferFromFile in{file_path};
-            distributed_header = readDistributedAsyncInsertHeader(in, log);
+            distributed_header = DistributedAsyncInsertHeader::read(in, log);
 
             if (distributed_header.rows)
             {
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
index 19235c91cc6..9584d1eebdf 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
@@ -17,7 +17,7 @@ namespace ErrorCodes
     extern const int CHECKSUM_DOESNT_MATCH;
 }
 
-DistributedAsyncInsertHeader readDistributedAsyncInsertHeader(ReadBufferFromFile & in, Poco::Logger * log)
+DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFile & in, Poco::Logger * log)
 {
     DistributedAsyncInsertHeader distributed_header;
 
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.h b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
index 2c7a6477b6c..dc048107a47 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertHeader.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
@@ -31,8 +31,8 @@ struct DistributedAsyncInsertHeader
     /// dumpStructure() of the header -- obsolete
     std::string block_header_string;
     Block block_header;
+
+    static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, Poco::Logger * log);
 };
 
-DistributedAsyncInsertHeader readDistributedAsyncInsertHeader(ReadBufferFromFile & in, Poco::Logger * log);
-
 }
diff --git a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp
index 782cbf9c026..7992636ac11 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp
@@ -22,7 +22,7 @@ struct DistributedAsyncInsertSource::Data
         : log(&Poco::Logger::get("DistributedAsyncInsertSource"))
         , in(file_name)
         , decompressing_in(in)
-        , block_in(decompressing_in, readDistributedAsyncInsertHeader(in, log).revision)
+        , block_in(decompressing_in, DistributedAsyncInsertHeader::read(in, log).revision)
         , first_block(block_in.read())
     {
     }

From 263c042c6aa6103e5be221fe36c83be593056ac6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 21:01:54 +0100
Subject: [PATCH 119/333] Fix opentelemetry for distributed batch sends

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../DistributedAsyncInsertBatch.cpp           | 79 ++++++++++++-------
 .../DistributedAsyncInsertDirectoryQueue.cpp  | 12 +--
 .../DistributedAsyncInsertHeader.cpp          | 16 ++++
 .../DistributedAsyncInsertHeader.h            |  7 ++
 4 files changed, 77 insertions(+), 37 deletions(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
index 948b4186ec6..10bf9b8d66e 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@@ -198,37 +198,59 @@ void DistributedAsyncInsertBatch::sendBatch()
 
     IConnectionPool::Entry connection;
 
-    for (const auto & file : files)
+    /// Since the batch is sent as a whole (in case of failure, the whole batch
+    /// will be repeated), we need to mark the whole batch as failed in case of
+    /// error).
+    std::vector<OpenTelemetry::TracingContextHolderPtr> tracing_contexts;
+    UInt64 batch_start_time = clock_gettime_ns();
+
+    try
     {
-        ReadBufferFromFile in(file);
-        const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);
-
-        OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
-            distributed_header.client_info.client_trace_context,
-            parent.storage.getContext()->getOpenTelemetrySpanLog());
-
-        if (!remote)
+        for (const auto & file : files)
         {
-            auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
-            connection = parent.pool->get(timeouts);
-            compression_expected = connection->getCompression() == Protocol::Compression::Enable;
+            ReadBufferFromFile in(file);
+            const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);
 
-            LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
-                files.size(),
-                connection->getDescription(),
-                formatReadableQuantity(total_rows),
-                formatReadableSizeWithBinarySuffix(total_bytes));
+            tracing_contexts.emplace_back(distributed_header.createTracingContextHolder(
+                parent.storage.getContext()->getOpenTelemetrySpanLog()));
+            tracing_contexts.back()->root_span.addAttribute("clickhouse.distributed_batch_start_time", batch_start_time);
 
-            remote = std::make_unique<RemoteInserter>(*connection, timeouts,
-                distributed_header.insert_query,
-                distributed_header.insert_settings,
-                distributed_header.client_info);
+            if (!remote)
+            {
+                auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
+                connection = parent.pool->get(timeouts);
+                compression_expected = connection->getCompression() == Protocol::Compression::Enable;
+
+                LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
+                    files.size(),
+                    connection->getDescription(),
+                    formatReadableQuantity(total_rows),
+                    formatReadableSizeWithBinarySuffix(total_bytes));
+
+                remote = std::make_unique<RemoteInserter>(*connection, timeouts,
+                    distributed_header.insert_query,
+                    distributed_header.insert_settings,
+                    distributed_header.client_info);
+            }
+            writeRemoteConvert(distributed_header, *remote, compression_expected, in, parent.log);
         }
-        writeRemoteConvert(distributed_header, *remote, compression_expected, in, parent.log);
-    }
 
-    if (remote)
-        remote->onFinish();
+        if (remote)
+            remote->onFinish();
+    }
+    catch (...)
+    {
+        try
+        {
+            for (auto & tracing_context : tracing_contexts)
+                tracing_context->root_span.addAttribute(std::current_exception());
+        }
+        catch (...)
+        {
+            tryLogCurrentException(parent.log, "Cannot append exception to tracing context");
+        }
+        throw;
+    }
 }
 
 void DistributedAsyncInsertBatch::sendSeparateFiles()
@@ -237,14 +259,15 @@ void DistributedAsyncInsertBatch::sendSeparateFiles()
 
     for (const auto & file : files)
     {
+        OpenTelemetry::TracingContextHolderPtr trace_context;
+
         try
         {
             ReadBufferFromFile in(file);
             const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);
 
             // This function is called in a separated thread, so we set up the trace context from the file
-            OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
-                distributed_header.client_info.client_trace_context,
+            trace_context = distributed_header.createTracingContextHolder(
                 parent.storage.getContext()->getOpenTelemetrySpanLog());
 
             auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
@@ -261,6 +284,8 @@ void DistributedAsyncInsertBatch::sendSeparateFiles()
         }
         catch (Exception & e)
         {
+            trace_context->root_span.addAttribute(std::current_exception());
+
             if (isDistributedSendBroken(e.code(), e.isRemoteException()))
             {
                 parent.markAsBroken(file);
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index b58096182ec..31c621571ab 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -418,16 +418,8 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_
 
         ReadBufferFromFile in(file_path);
         const auto & distributed_header = DistributedAsyncInsertHeader::read(in, log);
-
-        thread_trace_context = std::make_unique<OpenTelemetry::TracingContextHolder>(__PRETTY_FUNCTION__,
-            distributed_header.client_info.client_trace_context,
-            this->storage.getContext()->getOpenTelemetrySpanLog());
-        thread_trace_context->root_span.addAttribute("clickhouse.shard_num", distributed_header.shard_num);
-        thread_trace_context->root_span.addAttribute("clickhouse.cluster", distributed_header.cluster);
-        thread_trace_context->root_span.addAttribute("clickhouse.distributed", distributed_header.distributed_table);
-        thread_trace_context->root_span.addAttribute("clickhouse.remote", distributed_header.remote_table);
-        thread_trace_context->root_span.addAttribute("clickhouse.rows", distributed_header.rows);
-        thread_trace_context->root_span.addAttribute("clickhouse.bytes", distributed_header.bytes);
+        thread_trace_context =
+            distributed_header.createTracingContextHolder(storage.getContext()->getOpenTelemetrySpanLog());
 
         auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
         auto connection = pool->get(timeouts, &distributed_header.insert_settings);
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
index 9584d1eebdf..b5c16558eb2 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
@@ -5,6 +5,7 @@
 #include <IO/ReadHelpers.h>
 #include <Formats/NativeReader.h>
 #include <Core/ProtocolDefines.h>
+#include <Common/OpenTelemetryTraceContext.h>
 #include <Common/logger_useful.h>
 
 
@@ -106,4 +107,19 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi
     return distributed_header;
 }
 
+OpenTelemetry::TracingContextHolderPtr DistributedAsyncInsertHeader::createTracingContextHolder(std::shared_ptr<OpenTelemetrySpanLog> open_telemetry_span_log) const
+{
+    OpenTelemetry::TracingContextHolderPtr trace_context = std::make_unique<OpenTelemetry::TracingContextHolder>(
+        __PRETTY_FUNCTION__,
+        client_info.client_trace_context,
+        std::move(open_telemetry_span_log));
+    trace_context->root_span.addAttribute("clickhouse.shard_num", shard_num);
+    trace_context->root_span.addAttribute("clickhouse.cluster", cluster);
+    trace_context->root_span.addAttribute("clickhouse.distributed", distributed_table);
+    trace_context->root_span.addAttribute("clickhouse.remote", remote_table);
+    trace_context->root_span.addAttribute("clickhouse.rows", rows);
+    trace_context->root_span.addAttribute("clickhouse.bytes", bytes);
+    return trace_context;
+}
+
 }
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.h b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
index dc048107a47..dfc059d7083 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertHeader.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
@@ -11,6 +11,12 @@ namespace DB
 
 class ReadBufferFromFile;
 
+namespace OpenTelemetry
+{
+struct TracingContextHolder;
+using TracingContextHolderPtr = std::unique_ptr<TracingContextHolder>;
+}
+
 /// Header for the binary files that are stored on disk for async INSERT into Distributed.
 struct DistributedAsyncInsertHeader
 {
@@ -33,6 +39,7 @@ struct DistributedAsyncInsertHeader
     Block block_header;
 
     static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, Poco::Logger * log);
+    OpenTelemetry::TracingContextHolderPtr createTracingContextHolder(std::shared_ptr<OpenTelemetrySpanLog> open_telemetry_span_log) const;
 };
 
 }

From 572dd76eae2b9e3090e07c388c6baa142a095a57 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 21:53:01 +0100
Subject: [PATCH 120/333] Extend ConcurrentBoundedQueue to deque

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ConcurrentBoundedQueue.h | 59 +++++++++++++++++++----------
 1 file changed, 39 insertions(+), 20 deletions(-)

diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h
index fd4a2d5790b..665be69a28f 100644
--- a/src/Common/ConcurrentBoundedQueue.h
+++ b/src/Common/ConcurrentBoundedQueue.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <queue>
+#include <deque>
 #include <type_traits>
 #include <atomic>
 #include <condition_variable>
@@ -18,7 +18,8 @@ template <typename T>
 class ConcurrentBoundedQueue
 {
 private:
-    std::queue<T> queue;
+    using Container = std::deque<T>;
+    Container queue;
 
     mutable std::mutex queue_mutex;
     std::condition_variable push_condition;
@@ -28,7 +29,7 @@ private:
 
     size_t max_fill = 0;
 
-    template <typename ... Args>
+    template <bool back, typename ... Args>
     bool emplaceImpl(std::optional<UInt64> timeout_milliseconds, Args &&...args)
     {
         {
@@ -51,13 +52,17 @@ private:
             if (is_finished)
                 return false;
 
-            queue.emplace(std::forward<Args>(args)...);
+            if constexpr (back)
+                queue.emplace_back(std::forward<Args>(args)...);
+            else
+                queue.emplace_front(std::forward<Args>(args)...);
         }
 
         pop_condition.notify_one();
         return true;
     }
 
+    template <bool front>
     bool popImpl(T & x, std::optional<UInt64> timeout_milliseconds)
     {
         {
@@ -80,8 +85,16 @@ private:
             if (is_finished && queue.empty())
                 return false;
 
-            detail::moveOrCopyIfThrow(std::move(queue.front()), x);
-            queue.pop();
+            if constexpr (front)
+            {
+                detail::moveOrCopyIfThrow(std::move(queue.front()), x);
+                queue.pop_front();
+            }
+            else
+            {
+                detail::moveOrCopyIfThrow(std::move(queue.back()), x);
+                queue.pop_back();
+            }
         }
 
         push_condition.notify_one();
@@ -94,6 +107,12 @@ public:
         : max_fill(max_fill_)
     {}
 
+    /// Returns false if queue is finished
+    [[nodiscard]] bool pushFront(const T & x)
+    {
+        return emplaceImpl</* back= */ false>(/* timeout_milliseconds= */ std::nullopt , x);
+    }
+
     /// Returns false if queue is finished
     [[nodiscard]] bool push(const T & x)
     {
@@ -109,37 +128,37 @@ public:
     template <typename... Args>
     [[nodiscard]] bool emplace(Args &&... args)
     {
-        return emplaceImpl(std::nullopt /* timeout in milliseconds */, std::forward<Args...>(args...));
-    }
-
-    /// Returns false if queue is finished and empty
-    [[nodiscard]] bool pop(T & x)
-    {
-        return popImpl(x, std::nullopt /*timeout in milliseconds*/);
+        return emplaceImpl</* back= */ true>(std::nullopt /* timeout in milliseconds */, std::forward<Args...>(args...));
     }
 
     /// Returns false if queue is finished or object was not pushed during timeout
     [[nodiscard]] bool tryPush(const T & x, UInt64 milliseconds = 0)
     {
-        return emplaceImpl(milliseconds, x);
+        return emplaceImpl</* back= */ true>(milliseconds, x);
     }
 
     [[nodiscard]] bool tryPush(T && x, UInt64 milliseconds = 0)
     {
-        return emplaceImpl(milliseconds, std::move(x));
+        return emplaceImpl</* back= */ true>(milliseconds, std::move(x));
     }
 
     /// Returns false if queue is finished or object was not emplaced during timeout
     template <typename... Args>
     [[nodiscard]] bool tryEmplace(UInt64 milliseconds, Args &&... args)
     {
-        return emplaceImpl(milliseconds, std::forward<Args...>(args...));
+        return emplaceImpl</* back= */ true>(milliseconds, std::forward<Args...>(args...));
+    }
+
+    /// Returns false if queue is finished and empty
+    [[nodiscard]] bool pop(T & x)
+    {
+        return popImpl</* front= */ true>(x, std::nullopt /*timeout in milliseconds*/);
     }
 
     /// Returns false if queue is (finished and empty) or (object was not popped during timeout)
     [[nodiscard]] bool tryPop(T & x, UInt64 milliseconds)
     {
-        return popImpl(x, milliseconds);
+        return popImpl</* front= */ true>(x, milliseconds);
     }
 
     /// Returns false if queue is empty.
@@ -153,7 +172,7 @@ public:
                 return false;
 
             detail::moveOrCopyIfThrow(std::move(queue.front()), x);
-            queue.pop();
+            queue.pop_front();
         }
 
         push_condition.notify_one();
@@ -222,7 +241,7 @@ public:
             if (is_finished)
                 return;
 
-            std::queue<T> empty_queue;
+            Container empty_queue;
             queue.swap(empty_queue);
         }
 
@@ -235,7 +254,7 @@ public:
         {
             std::lock_guard<std::mutex> lock(queue_mutex);
 
-            std::queue<T> empty_queue;
+            Container empty_queue;
             queue.swap(empty_queue);
             is_finished = true;
         }

From 752d27d663afd9b1b7ce384e9b9b84bf9f4e370e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 22:00:57 +0100
Subject: [PATCH 121/333] Fix lossing files during distributed batch send

v2: do not suppress exceptions in case of errors
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../DistributedAsyncInsertDirectoryQueue.cpp  | 128 ++++++++++--------
 1 file changed, 73 insertions(+), 55 deletions(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 31c621571ab..32d1823b289 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -545,78 +545,96 @@ void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching()
     std::unordered_map<BatchHeader, DistributedAsyncInsertBatch, BatchHeader::Hash> header_to_batch;
 
     std::string file_path;
-    while (pending_files.tryPop(file_path))
+
+    try
     {
-        size_t total_rows = 0;
-        size_t total_bytes = 0;
-        Block header;
-        DistributedAsyncInsertHeader distributed_header;
-        try
+        while (pending_files.tryPop(file_path))
         {
-            /// Determine metadata of the current file and check if it is not broken.
-            ReadBufferFromFile in{file_path};
-            distributed_header = DistributedAsyncInsertHeader::read(in, log);
-
-            if (distributed_header.rows)
+            size_t total_rows = 0;
+            size_t total_bytes = 0;
+            Block header;
+            DistributedAsyncInsertHeader distributed_header;
+            try
             {
-                total_rows += distributed_header.rows;
-                total_bytes += distributed_header.bytes;
-            }
+                /// Determine metadata of the current file and check if it is not broken.
+                ReadBufferFromFile in{file_path};
+                distributed_header = DistributedAsyncInsertHeader::read(in, log);
 
-            if (distributed_header.block_header)
-                header = distributed_header.block_header;
-
-            if (!total_rows || !header)
-            {
-                LOG_DEBUG(log, "Processing batch {} with old format (no header/rows)", in.getFileName());
-
-                CompressedReadBuffer decompressing_in(in);
-                NativeReader block_in(decompressing_in, distributed_header.revision);
-
-                while (Block block = block_in.read())
+                if (distributed_header.rows)
                 {
-                    total_rows += block.rows();
-                    total_bytes += block.bytes();
+                    total_rows += distributed_header.rows;
+                    total_bytes += distributed_header.bytes;
+                }
 
-                    if (!header)
-                        header = block.cloneEmpty();
+                if (distributed_header.block_header)
+                    header = distributed_header.block_header;
+
+                if (!total_rows || !header)
+                {
+                    LOG_DEBUG(log, "Processing batch {} with old format (no header/rows)", in.getFileName());
+
+                    CompressedReadBuffer decompressing_in(in);
+                    NativeReader block_in(decompressing_in, distributed_header.revision);
+
+                    while (Block block = block_in.read())
+                    {
+                        total_rows += block.rows();
+                        total_bytes += block.bytes();
+
+                        if (!header)
+                            header = block.cloneEmpty();
+                    }
                 }
             }
-        }
-        catch (const Exception & e)
-        {
-            if (isDistributedSendBroken(e.code(), e.isRemoteException()))
+            catch (const Exception & e)
             {
-                markAsBroken(file_path);
-                tryLogCurrentException(log, "File is marked broken due to");
-                continue;
+                if (isDistributedSendBroken(e.code(), e.isRemoteException()))
+                {
+                    markAsBroken(file_path);
+                    tryLogCurrentException(log, "File is marked broken due to");
+                    continue;
+                }
+                else
+                    throw;
+            }
+
+            BatchHeader batch_header(
+                std::move(distributed_header.insert_settings),
+                std::move(distributed_header.insert_query),
+                std::move(distributed_header.client_info),
+                std::move(header)
+            );
+            DistributedAsyncInsertBatch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
+
+            batch.files.push_back(file_path);
+            batch.total_rows += total_rows;
+            batch.total_bytes += total_bytes;
+
+            if (batch.isEnoughSize())
+            {
+                batch.send();
             }
-            else
-                throw;
         }
 
-        BatchHeader batch_header(
-            std::move(distributed_header.insert_settings),
-            std::move(distributed_header.insert_query),
-            std::move(distributed_header.client_info),
-            std::move(header)
-        );
-        DistributedAsyncInsertBatch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
-
-        batch.files.push_back(file_path);
-        batch.total_rows += total_rows;
-        batch.total_bytes += total_bytes;
-
-        if (batch.isEnoughSize())
+        for (auto & kv : header_to_batch)
         {
+            DistributedAsyncInsertBatch & batch = kv.second;
             batch.send();
         }
     }
-
-    for (auto & kv : header_to_batch)
+    catch (...)
     {
-        DistributedAsyncInsertBatch & batch = kv.second;
-        batch.send();
+        /// Revert uncommitted files.
+        for (const auto & [_, batch] : header_to_batch)
+        {
+            for (const auto & file : batch.files)
+            {
+                if (!pending_files.pushFront(file))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot re-schedule a file '{}'", file);
+            }
+        }
+        /// Rethrow exception
+        throw;
     }
 
     {

From 16bfef3c8ae2f2af3f8a680ae6f4ed4e871ba6cd Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 21 Jan 2023 22:21:48 +0100
Subject: [PATCH 122/333] Fix processing current_batch.txt on init

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp    | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 32d1823b289..97aaa8fba54 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -550,6 +550,12 @@ void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching()
     {
         while (pending_files.tryPop(file_path))
         {
+            if (!fs::exists(file_path))
+            {
+                LOG_WARNING(log, "File {} does not exists, likely due to current_batch.txt processing", file_path);
+                continue;
+            }
+
             size_t total_rows = 0;
             size_t total_bytes = 0;
             Block header;

From 7063c20b3c4e288397096431c93118f75eea9ca5 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 17 Feb 2023 16:43:08 +0100
Subject: [PATCH 123/333] Change noisy "Skipping send data over distributed
 table." message to test

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 97aaa8fba54..0e31a759c4f 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -229,7 +229,7 @@ void DistributedAsyncInsertDirectoryQueue::run()
             }
         }
         else
-            LOG_DEBUG(log, "Skipping send data over distributed table.");
+            LOG_TEST(log, "Skipping send data over distributed table.");
 
         const auto now = std::chrono::system_clock::now();
         if (now - last_decrease_time > decrease_error_count_period)

From 591fca57f3fd1c072078f386b339157bc42d9ad7 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 18 Feb 2023 22:27:45 +0100
Subject: [PATCH 124/333] Fix function names for opentelemetry spans in
 StorageDistributed

Fixes: 02417_opentelemetry_insert_on_distributed_table
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DistributedAsyncInsertBatch.cpp     | 2 ++
 .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp     | 5 +++--
 src/Storages/Distributed/DistributedAsyncInsertHeader.cpp    | 4 ++--
 src/Storages/Distributed/DistributedAsyncInsertHeader.h      | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
index 10bf9b8d66e..bf410eed6cc 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@@ -212,6 +212,7 @@ void DistributedAsyncInsertBatch::sendBatch()
             const auto & distributed_header = DistributedAsyncInsertHeader::read(in, parent.log);
 
             tracing_contexts.emplace_back(distributed_header.createTracingContextHolder(
+                __PRETTY_FUNCTION__,
                 parent.storage.getContext()->getOpenTelemetrySpanLog()));
             tracing_contexts.back()->root_span.addAttribute("clickhouse.distributed_batch_start_time", batch_start_time);
 
@@ -268,6 +269,7 @@ void DistributedAsyncInsertBatch::sendSeparateFiles()
 
             // This function is called in a separated thread, so we set up the trace context from the file
             trace_context = distributed_header.createTracingContextHolder(
+                __PRETTY_FUNCTION__,
                 parent.storage.getContext()->getOpenTelemetrySpanLog());
 
             auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index 0e31a759c4f..c6f675533c6 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -418,8 +418,9 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_
 
         ReadBufferFromFile in(file_path);
         const auto & distributed_header = DistributedAsyncInsertHeader::read(in, log);
-        thread_trace_context =
-            distributed_header.createTracingContextHolder(storage.getContext()->getOpenTelemetrySpanLog());
+        thread_trace_context = distributed_header.createTracingContextHolder(
+            __PRETTY_FUNCTION__,
+            storage.getContext()->getOpenTelemetrySpanLog());
 
         auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(distributed_header.insert_settings);
         auto connection = pool->get(timeouts, &distributed_header.insert_settings);
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
index b5c16558eb2..018c1d863bb 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
@@ -107,10 +107,10 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi
     return distributed_header;
 }
 
-OpenTelemetry::TracingContextHolderPtr DistributedAsyncInsertHeader::createTracingContextHolder(std::shared_ptr<OpenTelemetrySpanLog> open_telemetry_span_log) const
+OpenTelemetry::TracingContextHolderPtr DistributedAsyncInsertHeader::createTracingContextHolder(const char * function, std::shared_ptr<OpenTelemetrySpanLog> open_telemetry_span_log) const
 {
     OpenTelemetry::TracingContextHolderPtr trace_context = std::make_unique<OpenTelemetry::TracingContextHolder>(
-        __PRETTY_FUNCTION__,
+        function,
         client_info.client_trace_context,
         std::move(open_telemetry_span_log));
     trace_context->root_span.addAttribute("clickhouse.shard_num", shard_num);
diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.h b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
index dfc059d7083..a7330fa5ef1 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertHeader.h
+++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.h
@@ -39,7 +39,7 @@ struct DistributedAsyncInsertHeader
     Block block_header;
 
     static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, Poco::Logger * log);
-    OpenTelemetry::TracingContextHolderPtr createTracingContextHolder(std::shared_ptr<OpenTelemetrySpanLog> open_telemetry_span_log) const;
+    OpenTelemetry::TracingContextHolderPtr createTracingContextHolder(const char * function, std::shared_ptr<OpenTelemetrySpanLog> open_telemetry_span_log) const;
 };
 
 }

From d06a4b50d60e6ccb6e8907559abd0a8c5b27edae Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 28 Feb 2023 22:40:54 +0100
Subject: [PATCH 125/333] Latest review fixes (variable naming:
 s/monitor/queue)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Distributed/DistributedSink.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp
index 14febb1a2b4..1c372e87495 100644
--- a/src/Storages/Distributed/DistributedSink.cpp
+++ b/src/Storages/Distributed/DistributedSink.cpp
@@ -705,7 +705,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
     CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level);
 
     /// tmp directory is used to ensure atomicity of transactions
-    /// and keep monitor thread out from reading incomplete data
+    /// and keep directory queue thread out from reading incomplete data
     std::string first_file_tmp_path;
 
     auto reservation = storage.getStoragePolicy()->reserveAndCheck(block.bytes());
@@ -824,12 +824,12 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
         fs::create_directory(path);
 
         auto bin_file = (fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin")).string();
-        auto & directory_monitor = storage.getDirectoryQueue(disk, *it);
+        auto & directory_queue = storage.getDirectoryQueue(disk, *it);
         {
             createHardLink(first_file_tmp_path, bin_file);
             auto dir_sync_guard = make_directory_sync_guard(*it);
         }
-        directory_monitor.addFileAndSchedule(bin_file, file_size, sleep_ms);
+        directory_queue.addFileAndSchedule(bin_file, file_size, sleep_ms);
     }
 
     /// remove the temporary file, enabling the OS to reclaim inode after all threads

From 99329d868232d9377d7f808763e951e6f15fd71c Mon Sep 17 00:00:00 2001
From: AVMusorin <aleksandr.musorin@semrush.com>
Date: Fri, 10 Feb 2023 13:55:04 +0100
Subject: [PATCH 126/333] Improve tests for Distributed INSERT

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../configs/overrides_1.xml                   |   2 -
 .../configs/overrides_2.xml                   |   2 -
 .../test.py                                   | 113 +++++++++++-------
 .../02536_distributed_detach_table.reference  |   2 +
 .../02536_distributed_detach_table.sql        |  16 +++
 ...ed_loosing_files_after_exception.reference |  16 +++
 ...buted_loosing_files_after_exception.sql.j2 |  32 +++++
 7 files changed, 135 insertions(+), 48 deletions(-)
 create mode 100644 tests/queries/0_stateless/02536_distributed_detach_table.reference
 create mode 100644 tests/queries/0_stateless/02536_distributed_detach_table.sql
 create mode 100644 tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference
 create mode 100644 tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2

diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml
index 397e05e7a60..a79ce3de1fc 100644
--- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml
+++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml
@@ -3,8 +3,6 @@
         <default>
             <!-- always send via network -->
             <prefer_localhost_replica>0</prefer_localhost_replica>
-            <!-- enable batching to check splitting -->
-            <distributed_directory_monitor_batch_inserts>1</distributed_directory_monitor_batch_inserts>
             <!-- override defaults just in case they will be changed -->
             <distributed_directory_monitor_split_batch_on_failure>1</distributed_directory_monitor_split_batch_on_failure>
             <!-- wait for explicit flush -->
diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml
index 2ffd5beaf8d..8279fcdbe6d 100644
--- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml
+++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml
@@ -3,8 +3,6 @@
         <default>
             <!-- always send via network -->
             <prefer_localhost_replica>0</prefer_localhost_replica>
-            <!-- enable batching to check splitting -->
-            <distributed_directory_monitor_batch_inserts>1</distributed_directory_monitor_batch_inserts>
             <!-- disable -->
             <distributed_directory_monitor_split_batch_on_failure>0</distributed_directory_monitor_split_batch_on_failure>
             <!-- wait for explicit flush -->
diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py
index a47268b06fd..faa38af6533 100644
--- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py
+++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py
@@ -18,61 +18,86 @@ node2 = cluster.add_instance(
 )
 
 
+def get_test_settings():
+    settings = {"monitor_batch_inserts": [0, 1]}
+    return [(k, v) for k, values in settings.items() for v in values]
+
+
+def drop_tables():
+    tables = ["null_", "dist", "data", "mv", "dist_data"]
+    query = "\n".join([f"drop table if exists {table};" for table in tables])
+    for _, node in cluster.instances.items():
+        node.query(query)
+
+
+def create_tables(**dist_settings):
+    drop_tables()
+    _settings_values = ",".join([f"{k}={v}" for k, v in dist_settings.items()])
+    _settings = f"settings {_settings_values}" if _settings_values else ""
+    for _, node in cluster.instances.items():
+        node.query(
+            f"""
+                create table null_ (key Int, value Int) engine=Null();
+                create table dist as null_ engine=Distributed(test_cluster, currentDatabase(), null_, key) {_settings};
+                create table data (key Int, uniq_values Int) engine=Memory();
+                create materialized view mv to data as select key, uniqExact(value) uniq_values from null_ group by key;
+                system stop distributed sends dist;
+
+                create table dist_data as data engine=Distributed(test_cluster, currentDatabase(), data);
+                """
+        )
+
+
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
         cluster.start()
-
-        for _, node in cluster.instances.items():
-            node.query(
-                """
-            create table null_ (key Int, value Int) engine=Null();
-            create table dist as null_ engine=Distributed(test_cluster, currentDatabase(), null_, key);
-            create table data (key Int, uniq_values Int) engine=Memory();
-            create materialized view mv to data as select key, uniqExact(value) uniq_values from null_ group by key;
-            system stop distributed sends dist;
-
-            create table dist_data as data engine=Distributed(test_cluster, currentDatabase(), data);
-            """
-            )
-
         yield cluster
     finally:
+        drop_tables()
         cluster.shutdown()
 
 
 def test_distributed_directory_monitor_split_batch_on_failure_OFF(started_cluster):
-    for i in range(0, 100):
-        limit = 100e3
-        node2.query(
-            f"insert into dist select number/100, number from system.numbers limit {limit} offset {limit*i}",
-            settings={
-                # max_memory_usage is the limit for the batch on the remote node
-                # (local query should not be affected since 30MB is enough for 100K rows)
-                "max_memory_usage": "30Mi",
-                "max_untracked_memory": "0",
-            },
-        )
-    # "Received from" is mandatory, since the exception should be thrown on the remote node.
-    with pytest.raises(
-        QueryRuntimeException,
-        match=r"DB::Exception: Received from.*Memory limit \(for query\) exceeded: .*while pushing to view default\.mv",
-    ):
+    for setting, setting_value in get_test_settings():
+        create_tables(**{setting: setting_value})
+        for i in range(0, 100):
+            limit = 100e3
+            node2.query(
+                f"insert into dist select number/100, number from system.numbers limit {limit} offset {limit*i}",
+                settings={
+                    # max_memory_usage is the limit for the batch on the remote node
+                    # (local query should not be affected since 30MB is enough for 100K rows)
+                    "max_memory_usage": "30Mi",
+                    "max_untracked_memory": "0",
+                },
+            )
+        # "Received from" is mandatory, since the exception should be thrown on the remote node.
+        if setting == "monitor_batch_inserts" and setting_value == 1:
+            with pytest.raises(
+                QueryRuntimeException,
+                match=r"DB::Exception: Received from.*Memory limit \(for query\) exceeded: .*while pushing to view default\.mv",
+            ):
+                node2.query("system flush distributed dist")
+            assert int(node2.query("select count() from dist_data")) == 0
+            continue
         node2.query("system flush distributed dist")
-    assert int(node2.query("select count() from dist_data")) == 0
+        assert int(node2.query("select count() from dist_data")) == 100000
 
 
 def test_distributed_directory_monitor_split_batch_on_failure_ON(started_cluster):
-    for i in range(0, 100):
-        limit = 100e3
-        node1.query(
-            f"insert into dist select number/100, number from system.numbers limit {limit} offset {limit*i}",
-            settings={
-                # max_memory_usage is the limit for the batch on the remote node
-                # (local query should not be affected since 30MB is enough for 100K rows)
-                "max_memory_usage": "30Mi",
-                "max_untracked_memory": "0",
-            },
-        )
-    node1.query("system flush distributed dist")
-    assert int(node1.query("select count() from dist_data")) == 100000
+    for setting, setting_value in get_test_settings():
+        create_tables(**{setting: setting_value})
+        for i in range(0, 100):
+            limit = 100e3
+            node1.query(
+                f"insert into dist select number/100, number from system.numbers limit {limit} offset {limit*i}",
+                settings={
+                    # max_memory_usage is the limit for the batch on the remote node
+                    # (local query should not be affected since 30MB is enough for 100K rows)
+                    "max_memory_usage": "30Mi",
+                    "max_untracked_memory": "0",
+                },
+            )
+        node1.query("system flush distributed dist")
+        assert int(node1.query("select count() from dist_data")) == 100000
diff --git a/tests/queries/0_stateless/02536_distributed_detach_table.reference b/tests/queries/0_stateless/02536_distributed_detach_table.reference
new file mode 100644
index 00000000000..f09bace4421
--- /dev/null
+++ b/tests/queries/0_stateless/02536_distributed_detach_table.reference
@@ -0,0 +1,2 @@
+0	0
+10	20
diff --git a/tests/queries/0_stateless/02536_distributed_detach_table.sql b/tests/queries/0_stateless/02536_distributed_detach_table.sql
new file mode 100644
index 00000000000..92bee1ee544
--- /dev/null
+++ b/tests/queries/0_stateless/02536_distributed_detach_table.sql
@@ -0,0 +1,16 @@
+-- test detach distributed table with pending files
+CREATE TABLE test_02536 (n Int8) ENGINE=MergeTree() ORDER BY tuple();
+CREATE TABLE test_dist_02536 (n Int8) ENGINE=Distributed(test_cluster_two_shards, currentDatabase(), test_02536, rand());
+SYSTEM STOP DISTRIBUTED SENDS test_dist_02536;
+
+INSERT INTO test_dist_02536 SELECT number FROM numbers(5) SETTINGS prefer_localhost_replica=0;
+SELECT count(n), sum(n) FROM test_dist_02536; -- 0 0
+
+DETACH TABLE test_dist_02536;
+ATTACH TABLE test_dist_02536;
+
+SYSTEM FLUSH DISTRIBUTED test_dist_02536;
+
+SELECT count(n), sum(n) FROM test_dist_02536; -- 10 20
+DROP TABLE test_02536;
+DROP TABLE test_dist_02536;
diff --git a/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference
new file mode 100644
index 00000000000..7793e91fcb6
--- /dev/null
+++ b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference
@@ -0,0 +1,16 @@
+monitor_batch_insert=0
+1	2
+1	0
+-- { echoOn }
+SELECT sum(key), count(key) FROM dist;
+2	2
+SELECT sum(key), count(key) FROM underlying;
+2	2
+monitor_batch_insert=1
+1	2
+1	0
+-- { echoOn }
+SELECT sum(key), count(key) FROM dist;
+2	2
+SELECT sum(key), count(key) FROM underlying;
+2	2
diff --git a/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2 b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2
new file mode 100644
index 00000000000..4f8cf1ccffe
--- /dev/null
+++ b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2
@@ -0,0 +1,32 @@
+{% for setting in [0, 1] %}
+-- Testing that distributed table doesn't loose file after inserts which contain errors
+
+SELECT 'monitor_batch_insert={{ setting }}';
+
+DROP TABLE IF EXISTS dist;
+DROP TABLE IF EXISTS underlying;
+
+CREATE TABLE dist (key Int) ENGINE=Distributed(test_shard_localhost, currentDatabase(), underlying) SETTINGS monitor_batch_inserts={{ setting }};
+SYSTEM STOP DISTRIBUTED SENDS dist;
+
+INSERT INTO dist SETTINGS prefer_localhost_replica=0, max_threads=1 VALUES (1);
+INSERT INTO dist SETTINGS prefer_localhost_replica=0, max_threads=2 VALUES (1);
+
+SYSTEM FLUSH DISTRIBUTED dist; -- { serverError UNKNOWN_TABLE }
+-- check the second since after using queue it may got lost from it
+SYSTEM FLUSH DISTRIBUTED dist; -- { serverError UNKNOWN_TABLE }
+
+SELECT is_blocked, data_files FROM system.distribution_queue WHERE database = currentDatabase() AND table = 'dist';
+
+CREATE TABLE underlying (key Int) ENGINE=Memory();
+SYSTEM FLUSH DISTRIBUTED dist;
+
+-- all data should be flushed
+SELECT is_blocked, data_files FROM system.distribution_queue WHERE database = currentDatabase() AND table = 'dist';
+
+-- { echoOn }
+SELECT sum(key), count(key) FROM dist;
+SELECT sum(key), count(key) FROM underlying;
+-- { echoOff }
+
+{% endfor %}

From c797122dbe4fa97e726536cc5b6784e16a11dd44 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 1 Mar 2023 08:45:50 +0000
Subject: [PATCH 127/333] Better

---
 src/Coordination/KeeperStorage.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index b1f3b44b1e1..33b2a91d8bf 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -238,10 +238,7 @@ void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other)
 void KeeperStorage::Node::recalculateSize()
 {
     size_bytes = sizeof(Node);
-
-    for (const auto child_path : children)
-        size_bytes += sizeof child_path;
-
+    size_bytes += children.size() * sizeof(decltype(children)::value_type);
     size_bytes += data.size();
 }
 

From e1352adced7eb8f5e5201fb12c1634903ade6a59 Mon Sep 17 00:00:00 2001
From: artem-yadr <84010375+artem-yadr@users.noreply.github.com>
Date: Wed, 1 Mar 2023 12:50:03 +0300
Subject: [PATCH 128/333] Update MongoDBDictionarySource.cpp

---
 src/Dictionaries/MongoDBDictionarySource.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp
index f61efcab026..a9555a94304 100644
--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@@ -118,7 +118,7 @@ MongoDBDictionarySource::MongoDBDictionarySource(
         Poco::MongoDB::Connection::SocketFactory socket_factory;
         connection->connect(uri, socket_factory);
 
-        Poco::URI poco_uri(connection.uri());
+        Poco::URI poco_uri(connection->uri());
 
         // Parse database from URI. This is required for correctness -- the
         // cursor is created using database name and collection name, so we have

From 32c198be45477782ec31fb864894500336216a71 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 1 Mar 2023 12:47:31 +0000
Subject: [PATCH 129/333] trace raft logs

---
 tests/config/config.d/keeper_port.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index 2066dedfa56..03f80bfbe62 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -10,11 +10,12 @@
             <session_timeout_ms>100000</session_timeout_ms>
             <min_session_timeout_ms>10000</min_session_timeout_ms>
             <!-- FIXME enable force_sync because of suspicious rollback without it -->
-            <force_sync>true</force_sync>
+            <force_sync>false</force_sync>
             <startup_timeout>240000</startup_timeout>
             <!-- we want all logs for complex problems investigation -->
             <reserved_log_items>1000000000000000</reserved_log_items>
             <snapshot_distance>100000</snapshot_distance>
+            <raft_logs_level>trace</raft_logs_level>
 
             <!-- For instant start in single node configuration -->
             <heart_beat_interval_ms>0</heart_beat_interval_ms>

From 98004eec6b69d28ae735e75d263089ed9822e422 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 1 Mar 2023 13:08:24 +0000
Subject: [PATCH 130/333] Fix NOT_IMPLEMENTED error with CROSS JOIN and
 algorithm = auto

---
 src/Interpreters/ExpressionAnalyzer.cpp       |  1 +
 src/Planner/PlannerJoins.cpp                  |  8 +++--
 .../0_stateless/00202_cross_join.reference    | 30 +++++++++++++++++++
 .../queries/0_stateless/00202_cross_join.sql  |  6 ++++
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 2b88ff6a353..67aace815dc 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1089,6 +1089,7 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(
 
         if (MergeJoin::isSupported(analyzed_join))
             return std::make_shared<JoinSwitcher>(analyzed_join, right_sample_block);
+        return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
     }
 
     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index e1c137ddfb8..2a7bd49d6a3 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -655,7 +655,7 @@ std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_jo
         return std::make_shared<HashJoin>(table_join, right_table_expression_header);
     }
 
-    if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH))
+    if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) && !table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section");
 
     /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary
@@ -708,7 +708,11 @@ std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_jo
     }
 
     if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
-        return std::make_shared<JoinSwitcher>(table_join, right_table_expression_header);
+    {
+        if (MergeJoin::isSupported(table_join))
+            return std::make_shared<JoinSwitcher>(table_join, right_table_expression_header);
+        return std::make_shared<HashJoin>(table_join, right_table_expression_header);
+    }
 
     throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                     "Can't execute any of specified algorithms for specified strictness/kind and right storage type");
diff --git a/tests/queries/0_stateless/00202_cross_join.reference b/tests/queries/0_stateless/00202_cross_join.reference
index 122cf0a6e06..a8db281730a 100644
--- a/tests/queries/0_stateless/00202_cross_join.reference
+++ b/tests/queries/0_stateless/00202_cross_join.reference
@@ -13,3 +13,33 @@
 2	2
 2	3
 2	4
+0	0
+0	1
+0	2
+0	3
+0	4
+1	0
+1	1
+1	2
+1	3
+1	4
+2	0
+2	1
+2	2
+2	3
+2	4
+0	0
+0	1
+0	2
+0	3
+0	4
+1	0
+1	1
+1	2
+1	3
+1	4
+2	0
+2	1
+2	2
+2	3
+2	4
diff --git a/tests/queries/0_stateless/00202_cross_join.sql b/tests/queries/0_stateless/00202_cross_join.sql
index ed435d90021..8d62c56b3f1 100644
--- a/tests/queries/0_stateless/00202_cross_join.sql
+++ b/tests/queries/0_stateless/00202_cross_join.sql
@@ -1 +1,7 @@
 SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;
+
+SET join_algorithm = 'auto';
+SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;
+
+SET allow_experimental_analyzer = 1;
+SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2;

From f2e1d6d4025a3dcabf0f18828803d31252fc1bd9 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 1 Mar 2023 13:11:32 +0000
Subject: [PATCH 131/333] Update version_date.tsv and changelogs after
 v23.2.2.20-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.2.2.20-stable.md | 30 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  3 +++
 5 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.2.2.20-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 09395befdad..532f1531bb8 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
     esac
 
 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.2.20"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 472f25eed2d..f56e177bbe9 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.2.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5dbb244c298..1372288978d 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.2.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.2.2.20-stable.md b/docs/changelogs/v23.2.2.20-stable.md
new file mode 100644
index 00000000000..60aeaa66cbf
--- /dev/null
+++ b/docs/changelogs/v23.2.2.20-stable.md
@@ -0,0 +1,30 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a)
+
+#### Improvement
+* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+
+#### Bug Fix
+* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)).
+* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3814e94bf24..8a25ceb13cf 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,4 +1,6 @@
+v23.2.2.20-stable	2023-03-01
 v23.2.1.2537-stable	2023-02-23
+v23.1.4.58-stable	2023-03-01
 v23.1.3.5-stable	2023-02-03
 v23.1.2.9-stable	2023-01-29
 v23.1.1.3077-stable	2023-01-25
@@ -25,6 +27,7 @@ v22.9.4.32-stable	2022-10-26
 v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.14.53-lts	2023-02-27
 v22.8.13.20-lts	2023-01-29
 v22.8.12.45-lts	2023-01-10
 v22.8.11.15-lts	2022-12-08

From a898dd556fb475020a6a7fcb1fdf58ac43d826f2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 1 Mar 2023 13:12:22 +0000
Subject: [PATCH 132/333] Update version_date.tsv and changelogs after
 v23.1.4.58-stable

---
 docs/changelogs/v23.1.4.58-stable.md | 47 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 ++
 2 files changed, 49 insertions(+)
 create mode 100644 docs/changelogs/v23.1.4.58-stable.md

diff --git a/docs/changelogs/v23.1.4.58-stable.md b/docs/changelogs/v23.1.4.58-stable.md
new file mode 100644
index 00000000000..d1ffe87f58e
--- /dev/null
+++ b/docs/changelogs/v23.1.4.58-stable.md
@@ -0,0 +1,47 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce)
+
+#### Performance Improvement
+* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Improvement
+* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)).
+* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3814e94bf24..6cae29d53a8 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,4 +1,5 @@
 v23.2.1.2537-stable	2023-02-23
+v23.1.4.58-stable	2023-03-01
 v23.1.3.5-stable	2023-02-03
 v23.1.2.9-stable	2023-01-29
 v23.1.1.3077-stable	2023-01-25
@@ -25,6 +26,7 @@ v22.9.4.32-stable	2022-10-26
 v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.14.53-lts	2023-02-27
 v22.8.13.20-lts	2023-01-29
 v22.8.12.45-lts	2023-01-10
 v22.8.11.15-lts	2022-12-08

From 50d033c563c58d5925a19eff01464616fe16ea7f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 1 Mar 2023 13:48:23 +0000
Subject: [PATCH 133/333] Update version_date.tsv and changelogs after
 v22.12.4.76-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v22.12.4.76-stable.md | 55 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  4 ++
 5 files changed, 62 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v22.12.4.76-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 09395befdad..532f1531bb8 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
     esac
 
 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.2.20"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 472f25eed2d..f56e177bbe9 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.2.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5dbb244c298..1372288978d 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.2.20"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v22.12.4.76-stable.md b/docs/changelogs/v22.12.4.76-stable.md
new file mode 100644
index 00000000000..79569ff841e
--- /dev/null
+++ b/docs/changelogs/v22.12.4.76-stable.md
@@ -0,0 +1,55 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02)
+
+#### Performance Improvement
+* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### Bug Fix
+* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set<std::string>` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
+* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3814e94bf24..4cc74097371 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.2.2.20-stable	2023-03-01
 v23.2.1.2537-stable	2023-02-23
+v23.1.4.58-stable	2023-03-01
 v23.1.3.5-stable	2023-02-03
 v23.1.2.9-stable	2023-01-29
 v23.1.1.3077-stable	2023-01-25
+v22.12.4.76-stable	2023-03-01
 v22.12.3.5-stable	2023-01-10
 v22.12.2.25-stable	2023-01-06
 v22.12.1.1752-stable	2022-12-15
@@ -25,6 +28,7 @@ v22.9.4.32-stable	2022-10-26
 v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.14.53-lts	2023-02-27
 v22.8.13.20-lts	2023-01-29
 v22.8.12.45-lts	2023-01-10
 v22.8.11.15-lts	2022-12-08

From b11ed5910727f549a68d1a1dba8819505a8949f2 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 1 Mar 2023 15:37:03 +0100
Subject: [PATCH 134/333] remove ps

---
 .../0_stateless/02434_cancel_insert_when_client_dies.sh  | 7 +++----
 .../0_stateless/02435_rollback_cancelled_queries.sh      | 9 ++++-----
 tests/queries/shell_config.sh                            | 3 ++-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index 2a17095b267..c4d7367ba14 100755
--- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -67,7 +67,7 @@ function thread_cancel
         if (( RANDOM % 2 )); then
             SIGNAL="KILL"
         fi
-        PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
+        PID=$(grep -Fa "$TEST_MARK" /proc/*/cmdline | grep -Fav grep | grep -Eoa "/proc/[0-9]*/cmdline:" | grep -Eo "[0-9]*" | head -1)
         if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID"; fi
         sleep 0.$RANDOM;
         sleep 0.$RANDOM;
@@ -91,8 +91,7 @@ $CLICKHOUSE_CLIENT -q 'select count() from dedup_test'
 
 $CLICKHOUSE_CLIENT -q 'system flush logs'
 
-# We have to ignore stderr from thread_cancel, because our CI finds a bug in ps...
-# So use this query to check that thread_cancel do something
+# Ensure that thread_cancel actually did something
 $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
   message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
-  message like '%Connection reset by peer%')"
+  message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
index a69e526c1c8..b639cd5ef70 100755
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -83,7 +83,7 @@ function thread_cancel
         if (( RANDOM % 2 )); then
             SIGNAL="KILL"
         fi
-        PID=$(ps -ef | grep "$TEST_MARK" | grep -v grep | awk '{print $2}')
+        PID=$(grep -Fa "$TEST_MARK" /proc/*/cmdline | grep -Fav grep | grep -Eoa "/proc/[0-9]*/cmdline:" | grep -Eo "[0-9]*" | head -1)
         if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID"; fi
         sleep 0.$RANDOM;
     done
@@ -93,14 +93,13 @@ export -f thread_insert;
 export -f thread_select;
 export -f thread_cancel;
 
-TIMEOUT=20    # 5 seconds for each TYPE
+TIMEOUT=20
 
 timeout $TIMEOUT bash -c thread_insert &
 timeout $TIMEOUT bash -c thread_select &
 timeout $TIMEOUT bash -c thread_cancel 2> /dev/null &
 
 wait
-wait_for_queries_to_finish
 
 $CLICKHOUSE_CLIENT -q 'system flush logs'
 
@@ -110,10 +109,10 @@ insert_data
 $CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select throwIf(count() % 1000000 != 0 or count() = 0) from dedup_test' \
   || $CLICKHOUSE_CLIENT -q "select name, rows, active, visible, creation_tid, creation_csn from system.parts where database=currentDatabase();"
 
-# We have to ignore stderr from thread_cancel, because our CI finds a bug in ps...
-# So use this query to check that thread_cancel do something
+# Ensure that thread_cancel actually did something
 $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
   message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
   message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
 
+wait_for_queries_to_finish 30
 $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table dedup_test"
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index 3006b74d3f9..ffa286a95cd 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -136,12 +136,13 @@ function clickhouse_client_removed_host_parameter()
 
 function wait_for_queries_to_finish()
 {
+    local max_tries="${1:-20}"
     # Wait for all queries to finish (query may still be running if thread is killed by timeout)
     num_tries=0
     while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query NOT LIKE '%system.processes%'") -ne 0 ]]; do
         sleep 0.5;
         num_tries=$((num_tries+1))
-        if [ $num_tries -eq 20 ]; then
+        if [ $num_tries -eq $max_tries ]; then
             $CLICKHOUSE_CLIENT -q "SELECT * FROM system.processes WHERE current_database=currentDatabase() AND query NOT LIKE '%system.processes%' FORMAT Vertical"
             break
         fi

From 6ebee202bd398542b2867d455b6b193e87b627cf Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 1 Mar 2023 14:47:15 +0000
Subject: [PATCH 135/333] Flush buffer when no sync

---
 src/Coordination/Changelog.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 899310dc591..ddedae4fa0f 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -211,9 +211,14 @@ public:
     void flush()
     {
         auto * file_buffer = tryGetFileBuffer();
-        /// Fsync file system if needed
-        if (file_buffer && log_file_settings.force_sync)
-            file_buffer->sync();
+        if (file_buffer)
+        {
+            /// Fsync file system if needed
+            if (log_file_settings.force_sync)
+                file_buffer->sync();
+            else
+                file_buffer->next();
+        }
     }
 
     uint64_t getStartIndex() const

From 6ee65fa1dc3bef79cd6989a66999eca04fce32ee Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 1 Mar 2023 16:41:51 +0100
Subject: [PATCH 136/333] fix shellcheck

---
 .../0_stateless/01169_alter_partition_isolation_stress.sh        | 1 +
 .../queries/0_stateless/01171_mv_select_insert_isolation_long.sh | 1 +
 tests/queries/0_stateless/01174_select_insert_isolation.sh       | 1 +
 tests/queries/shell_config.sh                                    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh
index a385fc81fe4..508ad05224c 100755
--- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh
+++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh
@@ -2,6 +2,7 @@
 # Tags: long, no-replicated-database, no-ordinary-database
 
 # shellcheck disable=SC2015
+# shellcheck disable=SC2119
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh
index 12b654f4215..199c2b5389f 100755
--- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh
+++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 # Tags: long, no-parallel, no-ordinary-database
 # Test is too heavy, avoid parallel run in Flaky Check
+# shellcheck disable=SC2119
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01174_select_insert_isolation.sh b/tests/queries/0_stateless/01174_select_insert_isolation.sh
index dc5c1d7a722..29ccfbb1ccb 100755
--- a/tests/queries/0_stateless/01174_select_insert_isolation.sh
+++ b/tests/queries/0_stateless/01174_select_insert_isolation.sh
@@ -2,6 +2,7 @@
 # Tags: long, no-ordinary-database
 
 # shellcheck disable=SC2015
+# shellcheck disable=SC2119
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index ffa286a95cd..ef70c82aefc 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# shellcheck disable=SC2120
 
 # Don't check for ODR violation, since we may test shared build with ASAN
 export ASAN_OPTIONS=detect_odr_violation=0

From 96e7454df7cd2fd7eaaec97084b679b48be8416b Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 1 Mar 2023 16:31:50 +0000
Subject: [PATCH 137/333] Fix test

---
 tests/queries/0_stateless/00965_logs_level_bugfix.reference | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/00965_logs_level_bugfix.reference b/tests/queries/0_stateless/00965_logs_level_bugfix.reference
index 52396b3fe79..affd41b780b 100644
--- a/tests/queries/0_stateless/00965_logs_level_bugfix.reference
+++ b/tests/queries/0_stateless/00965_logs_level_bugfix.reference
@@ -2,7 +2,6 @@
 .
 <Debug>
 .
-<Information>
 .
 <Error>
 -

From 6537029cccd57d51a9125f8d56e5d0ac3c7cc5b6 Mon Sep 17 00:00:00 2001
From: cmsxbc <i@cmsis.me>
Date: Thu, 2 Mar 2023 02:15:09 +0800
Subject: [PATCH 138/333] fix: keeper systemd service file include invalid
 inline comment

---
 packages/clickhouse-keeper.service | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/clickhouse-keeper.service b/packages/clickhouse-keeper.service
index 2809074c93a..e4ec5bf4ede 100644
--- a/packages/clickhouse-keeper.service
+++ b/packages/clickhouse-keeper.service
@@ -14,7 +14,8 @@ User=clickhouse
 Group=clickhouse
 Restart=always
 RestartSec=30
-RuntimeDirectory=%p  # %p is resolved to the systemd unit name
+# %p is resolved to the systemd unit name
+RuntimeDirectory=%p
 ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
 # Minus means that this file is optional.
 EnvironmentFile=-/etc/default/%p

From f3e3b916c913fee844ab9b564112289bb12130f9 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 1 Mar 2023 19:53:01 +0100
Subject: [PATCH 139/333] Allow nested custom disks

---
 src/Disks/getOrCreateDiskFromAST.cpp          | 119 ++++++++++++------
 src/Disks/getOrCreateDiskFromAST.h            |   2 +-
 src/Parsers/FieldFromAST.cpp                  |  87 ++++++++-----
 src/Storages/MergeTree/MergeTreeSettings.cpp  |   3 +-
 .../test_disk_configuration/test.py           |  52 ++++++++
 ...54_create_table_with_custom_disk.reference |   1 +
 .../02454_create_table_with_custom_disk.sql   |  10 ++
 7 files changed, 204 insertions(+), 70 deletions(-)

diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp
index 997bd2c853f..4479b532fb9 100644
--- a/src/Disks/getOrCreateDiskFromAST.cpp
+++ b/src/Disks/getOrCreateDiskFromAST.cpp
@@ -6,9 +6,14 @@
 #include <Disks/DiskSelector.h>
 #include <Parsers/formatAST.h>
 #include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/isDiskFunction.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/isDiskFunction.h>
 #include <Interpreters/Context.h>
+#include <Parsers/IAST.h>
+#include <Interpreters/InDepthNodeVisitor.h>
 
 namespace DB
 {
@@ -18,48 +23,90 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context)
+namespace
 {
-    /// We need a unique name for a created custom disk, but it needs to be the same
-    /// after table is reattached or server is restarted, so take a hash of the disk
-    /// configuration serialized ast as a disk name suffix.
-    auto disk_setting_string = serializeAST(function, true);
-    auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX
-        + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size()));
-
-    LOG_TRACE(
-        &Poco::Logger::get("getOrCreateDiskFromDiskAST"),
-        "Using disk name `{}` for custom disk {}",
-        disk_name, disk_setting_string);
-
-    auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr {
-        const auto * function_args_expr = assert_cast<const ASTExpressionList *>(function.arguments.get());
-        const auto & function_args = function_args_expr->children;
-        auto config = getDiskConfigurationFromAST(disk_name, function_args, context);
-        auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map);
-        /// Mark that disk can be used without storage policy.
-        disk->markDiskAsCustom();
-        return disk;
-    });
-
-    if (!result_disk->isRemote())
+    std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context)
     {
-        static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory";
-        auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, "");
+        /// We need a unique name for a created custom disk, but it needs to be the same
+        /// after table is reattached or server is restarted, so take a hash of the disk
+        /// configuration serialized ast as a disk name suffix.
+        auto disk_setting_string = serializeAST(function, true);
+        auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX
+            + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size()));
 
-        if (disk_path_expected_prefix.empty())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "Base path for custom local disks must be defined in config file by `{}`",
-                custom_disks_base_dir_in_config);
+        LOG_TRACE(
+            &Poco::Logger::get("getOrCreateDiskFromDiskAST"),
+            "Using disk name `{}` for custom disk {}",
+            disk_name, disk_setting_string);
 
-        if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix))
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "Path of the custom local disk must be inside `{}` directory",
-                disk_path_expected_prefix);
+        auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr {
+            const auto * function_args_expr = assert_cast<const ASTExpressionList *>(function.arguments.get());
+            const auto & function_args = function_args_expr->children;
+            auto config = getDiskConfigurationFromAST(disk_name, function_args, context);
+            auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map);
+            /// Mark that disk can be used without storage policy.
+            disk->markDiskAsCustom();
+            return disk;
+        });
+
+        if (!result_disk->isRemote())
+        {
+            static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory";
+            auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, "");
+
+            if (disk_path_expected_prefix.empty())
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Base path for custom local disks must be defined in config file by `{}`",
+                    custom_disks_base_dir_in_config);
+
+            if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix))
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Path of the custom local disk must be inside `{}` directory",
+                    disk_path_expected_prefix);
+        }
+
+        return disk_name;
     }
 
+    class DiskConfigurationFlattener
+    {
+    public:
+        struct Data
+        {
+            ContextPtr context;
+        };
+
+        static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
+
+        static void visit(ASTPtr & ast, Data & data)
+        {
+            if (isDiskFunction(ast))
+            {
+                auto disk_name = getOrCreateDiskFromDiskAST(*ast->as<ASTFunction>(), data.context);
+                ast = std::make_shared<ASTLiteral>(disk_name);
+            }
+        }
+    };
+
+    /// Visits children first.
+    using FlattenDiskConfigurationVisitor = InDepthNodeVisitor<DiskConfigurationFlattener, false>;
+}
+
+
+std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context)
+{
+    if (!isDiskFunction(disk_function))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function");
+
+    auto ast = disk_function->clone();
+
+    FlattenDiskConfigurationVisitor::Data data{context};
+    FlattenDiskConfigurationVisitor{data}.visit(ast);
+
+    auto disk_name = assert_cast<const ASTLiteral &>(*ast).value.get<String>();
+    LOG_TRACE(&Poco::Logger::get("getOrCreateDiskFromDiskAST"), "Result disk name: {}", disk_name);
     return disk_name;
 }
 
diff --git a/src/Disks/getOrCreateDiskFromAST.h b/src/Disks/getOrCreateDiskFromAST.h
index 7c64707b0bd..0195f575278 100644
--- a/src/Disks/getOrCreateDiskFromAST.h
+++ b/src/Disks/getOrCreateDiskFromAST.h
@@ -13,6 +13,6 @@ class ASTFunction;
  * add it to DiskSelector by a unique (but always the same for given configuration) disk name
  * and return this name.
  */
-std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context);
+std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context);
 
 }
diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp
index 3cd10c1cf80..cdfd9c627bc 100644
--- a/src/Parsers/FieldFromAST.cpp
+++ b/src/Parsers/FieldFromAST.cpp
@@ -5,7 +5,10 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/isDiskFunction.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Common/assert_cast.h>
+#include <Interpreters/InDepthNodeVisitor.h>
 
 
 namespace DB
@@ -31,42 +34,64 @@ bool FieldFromASTImpl::isSecret() const
     return isDiskFunction(ast);
 }
 
+class DiskConfigurationHider
+{
+public:
+    struct Data {};
+
+    static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
+
+    static void visit(ASTPtr & ast, Data &)
+    {
+        if (isDiskFunction(ast))
+        {
+            const auto & disk_function = assert_cast<const ASTFunction &>(*ast);
+            const auto * disk_function_args_expr = assert_cast<const ASTExpressionList *>(disk_function.arguments.get());
+            const auto & disk_function_args = disk_function_args_expr->children;
+
+            auto is_secret_arg = [](const std::string & arg_name)
+            {
+                /// We allow to not hide type of the disk, e.g. disk(type = s3, ...)
+                /// and also nested disk, e.g. disk(type = cache, disk = disk(type = s3, ...))
+                return arg_name != "type" && arg_name != "disk";
+            };
+
+            for (const auto & arg : disk_function_args)
+            {
+                auto * setting_function = arg->as<ASTFunction>();
+                if (!setting_function || setting_function->name != "equals")
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function");
+
+                auto * function_args_expr = assert_cast<ASTExpressionList *>(setting_function->arguments.get());
+                if (!function_args_expr)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments");
+
+                auto & function_args = function_args_expr->children;
+                if (function_args.empty())
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments");
+
+                auto * key_identifier = function_args[0]->as<ASTIdentifier>();
+                if (!key_identifier)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier");
+
+                const std::string & key = key_identifier->name();
+                if (is_secret_arg(key))
+                    function_args[1] = std::make_shared<ASTLiteral>("[HIDDEN]");
+            }
+        }
+    }
+};
+
+/// Visits children first.
+using HideDiskConfigurationVisitor = InDepthNodeVisitor<DiskConfigurationHider, false>;
+
 String FieldFromASTImpl::toString(bool show_secrets) const
 {
     if (!show_secrets && isDiskFunction(ast))
     {
         auto hidden = ast->clone();
-        const auto & disk_function = assert_cast<const ASTFunction &>(*hidden);
-        const auto * disk_function_args_expr = assert_cast<const ASTExpressionList *>(disk_function.arguments.get());
-        const auto & disk_function_args = disk_function_args_expr->children;
-
-        auto is_secret_arg = [](const std::string & arg_name)
-        {
-            return arg_name != "type";
-        };
-
-        for (const auto & arg : disk_function_args)
-        {
-            auto * setting_function = arg->as<ASTFunction>();
-            if (!setting_function || setting_function->name != "equals")
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected equals function");
-
-            auto * function_args_expr = assert_cast<ASTExpressionList *>(setting_function->arguments.get());
-            if (!function_args_expr)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected arguments");
-
-            auto & function_args = function_args_expr->children;
-            if (function_args.empty())
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected non zero number of arguments");
-
-            auto * key_identifier = function_args[0]->as<ASTIdentifier>();
-            if (!key_identifier)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad format: expected Identifier");
-
-            const std::string & key = key_identifier->name();
-            if (is_secret_arg(key))
-                function_args[1] = std::make_shared<ASTLiteral>("[HIDDEN]");
-        }
+        HideDiskConfigurationVisitor::Data data{};
+        HideDiskConfigurationVisitor{data}.visit(hidden);
         return serializeAST(*hidden);
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp
index e951b8f54cf..479e50fdebb 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.cpp
+++ b/src/Storages/MergeTree/MergeTreeSettings.cpp
@@ -64,8 +64,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte
                         auto ast = dynamic_cast<const FieldFromASTImpl &>(custom.getImpl()).ast;
                         if (ast && isDiskFunction(ast))
                         {
-                            const auto & ast_function = assert_cast<const ASTFunction &>(*ast);
-                            auto disk_name = getOrCreateDiskFromDiskAST(ast_function, context);
+                            auto disk_name = getOrCreateDiskFromDiskAST(ast, context);
                             LOG_TRACE(&Poco::Logger::get("MergeTreeSettings"), "Created custom disk {}", disk_name);
                             value = disk_name;
                         }
diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py
index 34f8bea219f..96cdb0213bc 100644
--- a/tests/integration/test_disk_configuration/test.py
+++ b/tests/integration/test_disk_configuration/test.py
@@ -295,6 +295,58 @@ def test_merge_tree_custom_disk_setting(start_cluster):
     )
 
 
+def test_merge_tree_nested_custom_disk_setting(start_cluster):
+    node = cluster.instances["node1"]
+
+    minio = cluster.minio_client
+    for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)):
+        minio.remove_object(cluster.minio_bucket, obj.object_name)
+    assert (
+        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        == 0
+    )
+
+    node.query(
+        """
+        DROP TABLE IF EXISTS test;
+        CREATE TABLE test (a Int32)
+        ENGINE = MergeTree() order by tuple()
+        SETTINGS disk = disk(
+                type=cache,
+                max_size='1Gi',
+                path='/var/lib/clickhouse/custom_disk_cache/',
+                disk=disk(
+                    type=s3,
+                    endpoint='http://minio1:9001/root/data/',
+                    access_key_id='minio',
+                    secret_access_key='minio123'));
+    """
+    )
+
+    node.query("INSERT INTO test SELECT number FROM numbers(100)")
+    node.query("SYSTEM DROP FILESYSTEM CACHE")
+
+    # Check cache is filled
+    assert 0 == int(node.query("SELECT count() FROM system.filesystem_cache"))
+    assert 100 == int(node.query("SELECT count() FROM test"))
+    node.query("SELECT * FROM test")
+    assert 0 < int(node.query("SELECT count() FROM system.filesystem_cache"))
+
+    # Check s3 is filled
+    assert (
+        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) > 0
+    )
+
+    node.restart_clickhouse()
+
+    assert 100 == int(node.query("SELECT count() FROM test"))
+
+    expected = """
+        SETTINGS disk = disk(type = cache, max_size = \\'[HIDDEN]\\', path = \\'[HIDDEN]\\', disk = disk(type = s3, endpoint = \\'[HIDDEN]\\'
+    """
+    assert expected.strip() in node.query(f"SHOW CREATE TABLE test").strip()
+
+
 def test_merge_tree_setting_override(start_cluster):
     node = cluster.instances["node3"]
     assert (
diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference b/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference
index 1d8610c59c9..a71d52b6f57 100644
--- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference
+++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference
@@ -9,3 +9,4 @@ SETTINGS disk = disk(type = local, path = \'/var/lib/clickhouse/disks/local/\')
 CREATE TABLE default.test\n(\n    `a` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS disk = disk(type = local, path = \'[HIDDEN]\'), index_granularity = 8192
 a	Int32					
 200
+CREATE TABLE default.test\n(\n    `a` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS disk = disk(type = cache, max_size = \'[HIDDEN]\', path = \'[HIDDEN]\', disk = disk(type = local, path = \'[HIDDEN]\')), index_granularity = 8192
diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql
index 6cb1c0774aa..4909f91ccb1 100644
--- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql
+++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql
@@ -26,3 +26,13 @@ DESCRIBE TABLE test;
 
 INSERT INTO test SELECT number FROM numbers(100);
 SELECT count() FROM test;
+
+DROP TABLE test;
+
+CREATE TABLE test (a Int32)
+ENGINE = MergeTree() order by tuple()
+SETTINGS disk = disk(type=cache, max_size='1Gi', path='/var/lib/clickhouse/custom_disk_cache/', disk=disk(type=local, path='/var/lib/clickhouse/disks/local/'));
+
+SHOW CREATE TABLE test;
+
+DROP TABLE test;

From 13d50509c0b56fbef24132e4687122be633c6983 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 1 Mar 2023 20:16:15 +0100
Subject: [PATCH 140/333] Fix style check

---
 src/Disks/getOrCreateDiskFromAST.cpp | 1 -
 src/Parsers/FieldFromAST.cpp         | 2 --
 2 files changed, 3 deletions(-)

diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp
index 4479b532fb9..9abc4a7d413 100644
--- a/src/Disks/getOrCreateDiskFromAST.cpp
+++ b/src/Disks/getOrCreateDiskFromAST.cpp
@@ -8,7 +8,6 @@
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTIdentifier.h>
-#include <Parsers/isDiskFunction.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/isDiskFunction.h>
 #include <Interpreters/Context.h>
diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp
index cdfd9c627bc..b2251599441 100644
--- a/src/Parsers/FieldFromAST.cpp
+++ b/src/Parsers/FieldFromAST.cpp
@@ -5,8 +5,6 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/isDiskFunction.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
 #include <Common/assert_cast.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 

From cda4ff0df73d9c4bcdd759c0e1dfe6ebe633b82e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 1 Mar 2023 19:54:22 +0000
Subject: [PATCH 141/333] Remove trace logs

---
 tests/config/config.d/keeper_port.xml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index 03f80bfbe62..cffd325e968 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -9,13 +9,11 @@
             <operation_timeout_ms>10000</operation_timeout_ms>
             <session_timeout_ms>100000</session_timeout_ms>
             <min_session_timeout_ms>10000</min_session_timeout_ms>
-            <!-- FIXME enable force_sync because of suspicious rollback without it -->
             <force_sync>false</force_sync>
             <startup_timeout>240000</startup_timeout>
             <!-- we want all logs for complex problems investigation -->
             <reserved_log_items>1000000000000000</reserved_log_items>
             <snapshot_distance>100000</snapshot_distance>
-            <raft_logs_level>trace</raft_logs_level>
 
             <!-- For instant start in single node configuration -->
             <heart_beat_interval_ms>0</heart_beat_interval_ms>

From e17c0b6bf1e7cfe03e0d3f42a5398aee4ec18edb Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 1 Mar 2023 20:19:51 +0000
Subject: [PATCH 142/333] Review fixes

---
 src/Planner/PlannerJoinTree.cpp               | 30 ++++++++-----------
 .../02674_trivial_count_analyzer.reference    |  5 ++--
 .../02674_trivial_count_analyzer.sql          |  5 ++--
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 11944b4b71d..26fc2764a64 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -1,6 +1,7 @@
 #include <Planner/PlannerJoinTree.h>
 
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
 
 #include <Functions/FunctionFactory.h>
 
@@ -19,6 +20,8 @@
 #include <Analyzer/JoinNode.h>
 #include <Analyzer/ArrayJoinNode.h>
 #include <Analyzer/Utils.h>
+#include <Analyzer/AggregationUtils.h>
+#include <Analyzer/FunctionNode.h>
 
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/SortingStep.h>
@@ -27,6 +30,7 @@
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/JoinStep.h>
 #include <Processors/QueryPlan/ArrayJoinStep.h>
+#include <Processors/Sources/SourceFromSingleChunk.h>
 
 #include <Interpreters/Context.h>
 #include <Interpreters/IJoin.h>
@@ -41,12 +45,8 @@
 #include <Planner/Utils.h>
 
 #include <AggregateFunctions/AggregateFunctionCount.h>
-#include <Analyzer/AggregationUtils.h>
-#include <Analyzer/FunctionNode.h>
 #include <Columns/ColumnAggregateFunction.h>
 #include <Common/scope_guard_safe.h>
-#include <DataTypes/DataTypeAggregateFunction.h>
-#include <Processors/Sources/SourceFromSingleChunk.h>
 
 namespace DB
 {
@@ -174,23 +174,17 @@ bool applyTrivialCountIfPossible(
     if (!storage || storage->hasLightweightDeletedMask())
         return false;
 
-    if (settings.max_parallel_replicas > 1 || //
-        settings.allow_experimental_query_deduplication || //
-        settings.empty_result_for_aggregation_by_empty_set)
+    if (settings.max_parallel_replicas > 1 || settings.allow_experimental_query_deduplication
+        || settings.empty_result_for_aggregation_by_empty_set)
         return false;
 
     QueryTreeNodes aggregates = collectAggregateFunctionNodes(query_tree);
     if (aggregates.size() != 1)
         return false;
 
-    const auto * function_node = typeid_cast<const FunctionNode *>(aggregates.front().get());
-    if (!function_node)
-        return false;
-
-    if (!function_node->getAggregateFunction())
-        return false;
-
-    const auto * count_func = typeid_cast<const AggregateFunctionCount *>(function_node->getAggregateFunction().get());
+    const auto & function_node = aggregates.front().get()->as<const FunctionNode &>();
+    chassert(function_node.getAggregateFunction() != nullptr);
+    const auto * count_func = typeid_cast<const AggregateFunctionCount *>(function_node.getAggregateFunction().get());
     if (!count_func)
         return false;
 
@@ -226,7 +220,7 @@ bool applyTrivialCountIfPossible(
     SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
     agg_count.set(place, num_rows.value());
 
-    auto column = ColumnAggregateFunction::create(function_node->getAggregateFunction());
+    auto column = ColumnAggregateFunction::create(function_node.getAggregateFunction());
     column->insertFrom(place);
 
     /// get count() argument type
@@ -240,7 +234,7 @@ bool applyTrivialCountIfPossible(
 
     Block block_with_count{
         {std::move(column),
-         std::make_shared<DataTypeAggregateFunction>(function_node->getAggregateFunction(), argument_types, Array{}),
+         std::make_shared<DataTypeAggregateFunction>(function_node.getAggregateFunction(), argument_types, Array{}),
          columns_names.front()}};
 
     auto source = std::make_shared<SourceFromSingleChunk>(block_with_count);
@@ -416,7 +410,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
             }
         }
 
-        /// apply trivial_count optimization if possible
+        /// Apply trivial_count optimization if possible
         bool is_trivial_count_applied = is_single_table_expression && table_node && select_query_info.has_aggregates
             && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getQueryContext(), columns_names);
 
diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.reference b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference
index 2a94fd59d7b..05feadb58a0 100644
--- a/tests/queries/0_stateless/02674_trivial_count_analyzer.reference
+++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference
@@ -18,8 +18,7 @@ select count(b) from m3;
 2
 select count() + 1 from m3;
 3
--- drop table m3;
-
+drop table m3;
 -- checking queries with FINAL
 create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b);
 SYSTEM STOP MERGES replacing_m3;
@@ -39,10 +38,10 @@ select count(a) from replacing_m3;
 4
 select count(b) from replacing_m3;
 4
-set optimize_trivial_count_query=0; -- FIXME: wrong result for queries with FINAL
 select count() from replacing_m3 FINAL;
 3
 select count(a) from replacing_m3 FINAL;
 3
 select count(b) from replacing_m3 FINAL;
 3
+drop table replacing_m3;
diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.sql b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql
index d4a686e6eff..988d1b9ba92 100644
--- a/tests/queries/0_stateless/02674_trivial_count_analyzer.sql
+++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql
@@ -19,7 +19,7 @@ select count(a) from m3;
 select count(b) from m3;
 select count() + 1 from m3;
 
--- drop table m3;
+drop table m3;
 
 -- checking queries with FINAL
 create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b);
@@ -38,9 +38,8 @@ select count(*) from replacing_m3;
 select count(a) from replacing_m3;
 select count(b) from replacing_m3;
 
-set optimize_trivial_count_query=0; -- FIXME: wrong result for queries with FINAL
 select count() from replacing_m3 FINAL;
 select count(a) from replacing_m3 FINAL;
 select count(b) from replacing_m3 FINAL;
 
--- drop table replacing_m3;
+drop table replacing_m3;

From 5ab5902f38906254af902d01cc71851f6cb78401 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 1 Mar 2023 21:27:46 +0000
Subject: [PATCH 143/333] Allow control compression in Parquet/ORC/Arrow output
 formats, support more compression for input formats

---
 contrib/arrow-cmake/CMakeLists.txt            | 20 +++++++++-
 contrib/orc                                   |  2 +-
 src/Core/Settings.h                           |  3 ++
 src/Core/SettingsEnums.cpp                    | 23 +++++++++++-
 src/Core/SettingsEnums.h                      |  6 +++
 src/Formats/FormatFactory.cpp                 |  3 ++
 src/Formats/FormatSettings.h                  | 29 +++++++++++++++
 .../Formats/Impl/ArrowBlockOutputFormat.cpp   | 26 ++++++++++++-
 .../Formats/Impl/ORCBlockOutputFormat.cpp     | 32 +++++++++++++++-
 .../Formats/Impl/ParquetBlockOutputFormat.cpp | 37 +++++++++++++++++--
 10 files changed, 171 insertions(+), 10 deletions(-)

diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index ae6f270a768..4181f916d63 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
 
 # ARROW_ORC + adapters/orc/CMakefiles
 set(ORC_SRCS
+        "${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
+        "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
+        "${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
         "${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
         "${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
         "${ORC_SOURCE_SRC_DIR}/Reader.cc"
@@ -129,13 +136,20 @@ set(ORC_SRCS
         "${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
         "${ORC_SOURCE_SRC_DIR}/RLE.cc"
         "${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
-        "${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
+        "${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
+        "${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
+        "${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
         "${ORC_SOURCE_SRC_DIR}/Statistics.cc"
         "${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
         "${ORC_SOURCE_SRC_DIR}/Timezone.cc"
         "${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
         "${ORC_SOURCE_SRC_DIR}/Vector.cc"
         "${ORC_SOURCE_SRC_DIR}/Writer.cc"
+        "${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
+        "${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
+        "${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
+        "${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
+        "${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
         "${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
         "${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
         "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
@@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
 add_definitions(-DARROW_WITH_ZSTD)
 SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
 
+add_definitions(-DARROW_WITH_BROTLI)
+SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
+
 
 add_library(_arrow ${ARROW_SRCS})
 
@@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
     ch_contrib::snappy
     ch_contrib::zlib
     ch_contrib::zstd
+    ch_contrib::brotli
 )
 target_link_libraries(_arrow PUBLIC _orc)
 
diff --git a/contrib/orc b/contrib/orc
index f9a393ed243..c5d7755ba0b 160000
--- a/contrib/orc
+++ b/contrib/orc
@@ -1 +1 @@
-Subproject commit f9a393ed2433a60034795284f82d093b348f2102
+Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 3908254b6f1..8d3e787f42c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -857,6 +857,7 @@ class IColumn;
     M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
     M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
     M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
+    M(ParquetCompression, output_format_parquet_compression_method, "snappy", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
     M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
     M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
     M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
@@ -899,8 +900,10 @@ class IColumn;
     M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
     M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \
     M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, "Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.", 0) \
+    M(ArrowCompression, output_format_arrow_compression_method, "none", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \
     \
     M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \
+    M(ORCCompression, output_format_orc_compression_method, "none", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
     \
     M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
     \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 9e1ab585bb0..91572aa1b3f 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -158,7 +158,7 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
      {"XML", FormatSettings::EscapingRule::XML},
      {"Raw", FormatSettings::EscapingRule::Raw}})
 
-IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
+IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
                        {{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN},
                         {"str", FormatSettings::MsgPackUUIDRepresentation::STR},
                         {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
@@ -172,11 +172,30 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
      {"pread", LocalFSReadMethod::pread},
      {"read", LocalFSReadMethod::read}})
 
-
 IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS,
     {{"1.0",       FormatSettings::ParquetVersion::V1_0},
      {"2.4", FormatSettings::ParquetVersion::V2_4},
      {"2.6", FormatSettings::ParquetVersion::V2_6},
      {"2.latest", FormatSettings::ParquetVersion::V2_LATEST}})
 
+IMPLEMENT_SETTING_ENUM(ParquetCompression, ErrorCodes::BAD_ARGUMENTS,
+    {{"none", FormatSettings::ParquetCompression::NONE},
+     {"snappy", FormatSettings::ParquetCompression::SNAPPY},
+     {"zstd", FormatSettings::ParquetCompression::ZSTD},
+     {"gzip", FormatSettings::ParquetCompression::GZIP},
+     {"lz4", FormatSettings::ParquetCompression::LZ4},
+     {"brotli", FormatSettings::ParquetCompression::BROTLI}})
+
+IMPLEMENT_SETTING_ENUM(ArrowCompression, ErrorCodes::BAD_ARGUMENTS,
+    {{"none", FormatSettings::ArrowCompression::NONE},
+     {"lz4_frame", FormatSettings::ArrowCompression::LZ4_FRAME},
+     {"zstd", FormatSettings::ArrowCompression::ZSTD}})
+
+IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS,
+    {{"none", FormatSettings::ORCCompression::NONE},
+     {"snappy", FormatSettings::ORCCompression::SNAPPY},
+     {"zstd", FormatSettings::ORCCompression::ZSTD},
+     {"zlib", FormatSettings::ORCCompression::ZLIB},
+     {"lz4", FormatSettings::ORCCompression::LZ4}})
+
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 139a04f3a5a..14e952bbd65 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -194,6 +194,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
 
 DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
 
+DECLARE_SETTING_ENUM_WITH_RENAME(ParquetCompression, FormatSettings::ParquetCompression)
+
+DECLARE_SETTING_ENUM_WITH_RENAME(ArrowCompression, FormatSettings::ArrowCompression)
+
+DECLARE_SETTING_ENUM_WITH_RENAME(ORCCompression, FormatSettings::ORCCompression)
+
 enum class Dialect
 {
     clickhouse,
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index aca3166a8c4..7f14810b260 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -118,6 +118,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
     format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
     format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
+    format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
     format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
     format_settings.pretty.color = settings.output_format_pretty_color;
     format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
@@ -158,6 +159,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
     format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string;
     format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array;
+    format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method;
     format_settings.orc.import_nested = settings.input_format_orc_import_nested;
     format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
     format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
@@ -168,6 +170,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
     format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
     format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
+    format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
     format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
     format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
     format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index d1755a35c5f..88a5adbc8df 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -86,6 +86,13 @@ struct FormatSettings
 
     UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH;
 
+    enum class ArrowCompression
+    {
+        NONE,
+        LZ4_FRAME,
+        ZSTD
+    };
+
     struct
     {
         UInt64 row_group_size = 1000000;
@@ -96,6 +103,7 @@ struct FormatSettings
         bool case_insensitive_column_matching = false;
         bool output_string_as_string = false;
         bool output_fixed_string_as_fixed_byte_array = true;
+        ArrowCompression output_compression_method = ArrowCompression::NONE;
     } arrow;
 
     struct
@@ -183,6 +191,16 @@ struct FormatSettings
         V2_LATEST,
     };
 
+    enum class ParquetCompression
+    {
+        NONE,
+        SNAPPY,
+        ZSTD,
+        LZ4,
+        GZIP,
+        BROTLI,
+    };
+
     struct
     {
         UInt64 row_group_size = 1000000;
@@ -195,6 +213,7 @@ struct FormatSettings
         bool output_fixed_string_as_fixed_byte_array = true;
         UInt64 max_block_size = 8192;
         ParquetVersion output_version;
+        ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
     } parquet;
 
     struct Pretty
@@ -276,6 +295,15 @@ struct FormatSettings
         bool accurate_types_of_literals = true;
     } values;
 
+    enum class ORCCompression
+    {
+        NONE,
+        LZ4,
+        SNAPPY,
+        ZSTD,
+        ZLIB,
+    };
+
     struct
     {
         bool import_nested = false;
@@ -285,6 +313,7 @@ struct FormatSettings
         bool case_insensitive_column_matching = false;
         std::unordered_set<int> skip_stripes = {};
         bool output_string_as_string = false;
+        ORCCompression output_compression_method = ORCCompression::NONE;
     } orc;
 
     /// For capnProto format we should determine how to
diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
index bf0e2448082..ec35c52e37c 100644
--- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
@@ -8,6 +8,7 @@
 #include <arrow/result.h>
 #include "ArrowBufferedStreams.h"
 #include "CHColumnToArrowColumn.h"
+#include "config.h"
 
 
 namespace DB
@@ -17,6 +18,25 @@ namespace ErrorCodes
     extern const int UNKNOWN_EXCEPTION;
 }
 
+namespace
+{
+
+arrow::Compression::type getArrowCompression(FormatSettings::ArrowCompression method)
+{
+    switch (method)
+    {
+        case FormatSettings::ArrowCompression::NONE:
+            return arrow::Compression::type::UNCOMPRESSED;
+        case FormatSettings::ArrowCompression::ZSTD:
+            return arrow::Compression::type::ZSTD;
+        case FormatSettings::ArrowCompression::LZ4_FRAME:
+            return arrow::Compression::type::LZ4_FRAME;
+    }
+}
+
+}
+
+
 ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_)
     : IOutputFormat(header_, out_)
     , stream{stream_}
@@ -78,12 +98,14 @@ void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr<arrow::Schema>
 {
     arrow_ostream = std::make_shared<ArrowBufferedOutputStream>(out);
     arrow::Result<std::shared_ptr<arrow::ipc::RecordBatchWriter>> writer_status;
+    arrow::ipc::IpcWriteOptions options = arrow::ipc::IpcWriteOptions::Defaults();
+    options.codec = *arrow::util::Codec::Create(getArrowCompression(format_settings.arrow.output_compression_method));
 
     // TODO: should we use arrow::ipc::IpcOptions::alignment?
     if (stream)
-        writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema);
+        writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema, options);
     else
-        writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema);
+        writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema,options);
 
     if (!writer_status.ok())
         throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 42c3e178436..ecb7c2fbc92 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -22,12 +22,42 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 
+#include "config.h"
+
 namespace DB
 {
 
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
+    extern const int NOT_IMPLEMENTED;
+}
+
+namespace
+{
+
+orc::CompressionKind getORCCompression(FormatSettings::ORCCompression method)
+{
+    if (method == FormatSettings::ORCCompression::NONE)
+        return orc::CompressionKind::CompressionKind_NONE;
+
+#if USE_SNAPPY
+    if (method == FormatSettings::ORCCompression::SNAPPY)
+        return orc::CompressionKind::CompressionKind_SNAPPY;
+#endif
+
+    if (method == FormatSettings::ORCCompression::ZSTD)
+        return orc::CompressionKind::CompressionKind_ZSTD;
+
+    if (method == FormatSettings::ORCCompression::LZ4)
+        return orc::CompressionKind::CompressionKind_LZ4;
+
+    if (method == FormatSettings::ORCCompression::ZLIB)
+        return orc::CompressionKind::CompressionKind_ZLIB;
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
+}
+
 }
 
 ORCOutputStream::ORCOutputStream(WriteBuffer & out_) : out(out_) {}
@@ -529,7 +559,7 @@ void ORCBlockOutputFormat::prepareWriter()
 {
     const Block & header = getPort(PortKind::Main).getHeader();
     schema = orc::createStructType();
-    options.setCompression(orc::CompressionKind::CompressionKind_NONE);
+    options.setCompression(getORCCompression(format_settings.orc.output_compression_method));
     size_t columns_count = header.columns();
     for (size_t i = 0; i != columns_count; ++i)
         schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i])));
diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index 18c81f8fd6a..742912df980 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -16,6 +16,9 @@ namespace ErrorCodes
     extern const int UNKNOWN_EXCEPTION;
 }
 
+namespace
+{
+
 static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
 {
     switch (settings.parquet.output_version)
@@ -31,6 +34,36 @@ static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & se
     }
 }
 
+parquet::Compression::type getParquetCompression(FormatSettings::ParquetCompression method)
+{
+    if (method == FormatSettings::ParquetCompression::NONE)
+        return parquet::Compression::type::UNCOMPRESSED;
+
+#if USE_SNAPPY
+    if (method == FormatSettings::ParquetCompression::SNAPPY)
+        return parquet::Compression::type::SNAPPY;
+#endif
+
+#if USE_BROTLI
+    if (method == FormatSettings::ParquetCompression::BROTLI)
+        return parquet::Compression::type::BROTLI;
+#endif
+
+    if (method == FormatSettings::ParquetCompression::ZSTD)
+        return parquet::Compression::type::ZSTD;
+
+    if (method == FormatSettings::ParquetCompression::LZ4)
+        return parquet::Compression::type::LZ4;
+
+    if (method == FormatSettings::ParquetCompression::GZIP)
+        return parquet::Compression::type::GZIP;
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
+}
+
+
+}
+
 ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
     : IOutputFormat(header_, out_), format_settings{format_settings_}
 {
@@ -60,9 +93,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
 
         parquet::WriterProperties::Builder builder;
         builder.version(getParquetVersion(format_settings));
-#if USE_SNAPPY
-        builder.compression(parquet::Compression::SNAPPY);
-#endif
+        builder.compression(getParquetCompression(format_settings.parquet.output_compression_method));
         auto props = builder.build();
         auto status = parquet::arrow::FileWriter::Open(
             *arrow_table->schema(),

From 4f33e95c96af0b4cf9e1ad9ab3b92d1cfebf72e4 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 1 Mar 2023 21:28:37 +0000
Subject: [PATCH 144/333] Add tests

---
 ...1_parquet_arrow_orc_compressions.reference | 14 +++++++++++
 .../02581_parquet_arrow_orc_compressions.sh   | 24 +++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference
 create mode 100755 tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh

diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference
new file mode 100644
index 00000000000..492b12dba56
--- /dev/null
+++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference
@@ -0,0 +1,14 @@
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh
new file mode 100755
index 00000000000..10850e17bea
--- /dev/null
+++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='brotli'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='gzip'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
+
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zlib'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
+
+
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='lz4_frame'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table"
+$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table"
+

From 657afa849e8ab3140ebe3ce21ae7f05ef249a82a Mon Sep 17 00:00:00 2001
From: AndyB <andrey.bystrov@deliveroo.co.uk>
Date: Wed, 1 Mar 2023 23:35:09 +0000
Subject: [PATCH 145/333] logger level in test

---
 tests/integration/test_log_levels_update/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_log_levels_update/test.py b/tests/integration/test_log_levels_update/test.py
index 176733cd7cb..842c7914eaa 100644
--- a/tests/integration/test_log_levels_update/test.py
+++ b/tests/integration/test_log_levels_update/test.py
@@ -10,7 +10,7 @@ node = cluster.add_instance(
 
 config = """<clickhouse>
     <logger>
-        <level>information</level>
+        <level>debug</level>
         <log>/var/log/clickhouse-server/clickhouse-server.log</log>
     </logger>
 </clickhouse>"""

From 566a0e166fe0f51b21e11cb3977ac4f0e5aac6eb Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 2 Mar 2023 00:42:02 +0000
Subject: [PATCH 146/333] preserve uid gid if running with sudo

---
 .../decompressor.cpp                          | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 37fbd043814..5f2a769dcdb 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -168,6 +168,24 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
     return 0;
 }
 
+bool getSudoIDs(uid_t &sudo_uid, uid_t &sudo_gid)
+{
+    sudo_uid = 0;
+    sudo_gid = 0;
+
+    if (getuid() || geteuid() || getenv("SUDO_USER") == nullptr || getenv("SUDO_UID") == nullptr || getenv("SUDO_GID") == nullptr)
+        return false;
+
+    char * str_end;
+    long id = strtol(getenv("SUDO_UID"), &str_end, 10);
+    if (*str_end == 0)
+        sudo_uid = static_cast<uid_t>(id);
+    id = strtol(getenv("SUDO_GID"), &str_end, 10);
+    if (*str_end == 0)
+        sudo_gid = static_cast<uid_t>(id);
+
+    return true;
+}
 
 /// Read data about files and decomrpess them.
 int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask)
@@ -220,6 +238,10 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
         return 1;
     }
 
+    uid_t sudo_uid = 0;
+    uid_t sudo_gid = 0;
+    getSudoIDs(sudo_uid, sudo_gid);
+
     FileData file_info;
     /// Decompress files with appropriate file names
     for (size_t i = 0; i < le64toh(metadata.number_of_files); ++i)
@@ -319,6 +341,9 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
             perror("fsync");
         if (0 != close(output_fd))
             perror("close");
+
+        if (sudo_uid && sudo_gid)
+            chown(file_name, sudo_uid, sudo_gid);
     }
 
     if (0 != munmap(input, info_in.st_size))
@@ -532,6 +557,9 @@ int main(int/* argc*/, char* argv[])
             return 1;
         }
 
+        if (uid_t sudo_uid = 0, sudo_gid = 0; getSudoIDs(sudo_uid, sudo_gid))
+            chown(static_cast<char *>(self), sudo_uid, sudo_gid);
+
         if (has_exec)
         {
 #if !defined(OS_DARWIN) && !defined(OS_FREEBSD)

From ad4a44df52b6b04ac5977d12aa35b099a792133c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 2 Mar 2023 02:59:27 +0100
Subject: [PATCH 147/333] fix

---
 src/Interpreters/Session.cpp                  | 32 ++++++++++++++++++-
 src/Interpreters/Session.h                    |  3 ++
 src/Server/HTTPHandler.cpp                    | 14 +++++++-
 .../02435_rollback_cancelled_queries.sh       | 12 +++----
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index 7411050aa2d..70d4c0e6ae0 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -140,6 +140,23 @@ public:
         scheduleCloseSession(session, lock);
     }
 
+    void closeSession(const UUID & user_id, const String & session_id)
+    {
+        std::unique_lock lock(mutex);
+        Key key{user_id, session_id};
+        auto it = sessions.find(key);
+        if (it == sessions.end())
+        {
+            LOG_INFO(log, "Session {} not found for user {}, probably it's already closed", session_id, user_id);
+            return;
+        }
+
+        if (!it->second.unique())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot close session {} with refcount {}", session_id, it->second.use_count());
+
+        sessions.erase(it);
+    }
+
 private:
     class SessionKeyHash
     {
@@ -408,7 +425,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std:
     std::shared_ptr<NamedSessionData> new_named_session;
     bool new_named_session_created = false;
     std::tie(new_named_session, new_named_session_created)
-        = NamedSessionsStorage::instance().acquireSession(global_context, user_id.value_or(UUID{}), session_name_, timeout_, session_check_);
+        = NamedSessionsStorage::instance().acquireSession(global_context, *user_id, session_name_, timeout_, session_check_);
 
     auto new_session_context = new_named_session->context;
     new_session_context->makeSessionContext();
@@ -533,5 +550,18 @@ void Session::releaseSessionID()
     named_session = nullptr;
 }
 
+void Session::closeSession(const String & session_id)
+{
+    if (!user_id)   /// User was not authenticated
+        return;
+
+    /// named_session may be not set due to an early exception
+    if (!named_session)
+        return;
+
+    releaseSessionID();
+    NamedSessionsStorage::instance().closeSession(*user_id, session_id);
+}
+
 }
 
diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h
index 0f17c378915..443867806d6 100644
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@@ -77,6 +77,9 @@ public:
     /// Releases the currently used session ID so it becomes available for reuse by another session.
     void releaseSessionID();
 
+    /// Closes and removes session
+    void closeSession(const String & session_id);
+
 private:
     std::shared_ptr<SessionLog> getSessionLog() const;
     ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const;
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 702743ef1f0..f468167f782 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -24,6 +24,7 @@
 #include <Common/logger_useful.h>
 #include <Common/SettingsChanges.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Common/scope_guard_safe.h>
 #include <Common/setThreadName.h>
 #include <Common/typeid_cast.h>
 #include <Parsers/ASTSetQuery.h>
@@ -678,7 +679,7 @@ void HTTPHandler::processQuery(
     std::unique_ptr<ReadBuffer> in;
 
     static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace",
-        "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version"};
+        "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session"};
 
     Names reserved_param_suffixes;
 
@@ -957,6 +958,14 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
 
     /// In case of exception, send stack trace to client.
     bool with_stacktrace = false;
+    /// Close http session (if any) after processing the request
+    bool close_session = false;
+    String session_id;
+
+    SCOPE_EXIT_SAFE({
+        if (close_session && !session_id.empty())
+            session->closeSession(session_id);
+    });
 
     OpenTelemetry::TracingContextHolderPtr thread_trace_context;
     SCOPE_EXIT({
@@ -1006,6 +1015,9 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
 
         HTMLForm params(default_settings, request);
         with_stacktrace = params.getParsed<bool>("stacktrace", false);
+        close_session = params.getParsed<bool>("close_session", false);
+        if (close_session)
+            session_id = params.get("session_id");
 
         /// FIXME: maybe this check is already unnecessary.
         /// Workaround. Poco does not detect 411 Length Required case.
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
index b639cd5ef70..7c7ef037e02 100755
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -16,22 +16,20 @@ $CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order
 function insert_data
 {
     IMPLICIT=$(( RANDOM % 2 ))
-    SESSION_ID="${SESSION}_$RANDOM$RANDOM$RANDOM"
-    TXN_SETTINGS="session_id=$SESSION_ID&throw_on_unsupported_query_inside_transaction=0"
+    SESSION_ID="${SESSION}_$RANDOM.$RANDOM.$RANDOM"
+    TXN_SETTINGS="session_id=$SESSION_ID&throw_on_unsupported_query_inside_transaction=0&implicit_transaction=$IMPLICIT"
     BEGIN=""
     COMMIT=""
     SETTINGS="query_id=$ID&$TXN_SETTINGS&max_insert_block_size=110000&min_insert_block_size_rows=110000"
     if [[ "$IMPLICIT" -eq 0 ]]; then
         $CLICKHOUSE_CURL -sS -d 'begin transaction' "$CLICKHOUSE_URL&$TXN_SETTINGS"
+        SETTINGS="$SETTINGS&session_check=1"
         BEGIN="begin transaction;"
         COMMIT=$(echo -ne "\n\ncommit")
-    else
-        TXN_SETTINGS="$TXN_SETTINGS&implicit_transaction=1"
     fi
 
-    SETTINGS="query_id=$ID&$TXN_SETTINGS&max_insert_block_size=110000&min_insert_block_size_rows=110000"
     # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation)
-    TRASH_SETTINGS="query_id=$ID&$TXN_SETTINGS&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=110000&max_block_size=10000&min_insert_block_size_bytes=0&min_insert_block_size_rows=110000&max_insert_block_size=110000"
+    TRASH_SETTINGS="$SETTINGS&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_block_size=10000&min_insert_block_size_bytes=0"
     TYPE=$(( RANDOM % 6 ))
 
     if [[ "$TYPE" -eq 0 ]]; then
@@ -49,7 +47,7 @@ function insert_data
     fi
 
     if [[ "$IMPLICIT" -eq 0 ]]; then
-        $CLICKHOUSE_CURL -sS -d 'commit' "$CLICKHOUSE_URL&$TXN_SETTINGS" | grep -Faq "Transaction is not in RUNNING state" && $CLICKHOUSE_CURL -sS -d 'rollback' "$CLICKHOUSE_URL&$TXN_SETTINGS"
+        $CLICKHOUSE_CURL -sS -d 'commit' "$CLICKHOUSE_URL&$TXN_SETTINGS&close_session=1" | grep -Faq "Transaction is not in RUNNING state" && $CLICKHOUSE_CURL -sS -d 'rollback' "$CLICKHOUSE_URL&$TXN_SETTINGS"
     fi
 }
 

From a6cf2cdab8e59e8457def1a891fb3a3443918ca3 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 2 Mar 2023 10:36:07 +0000
Subject: [PATCH 148/333] Fix style, add docs

---
 docs/en/interfaces/formats.md                  |  3 +++
 .../en/operations/settings/settings-formats.md | 18 ++++++++++++++++++
 .../Formats/Impl/ArrowBlockOutputFormat.cpp    |  2 --
 .../Formats/Impl/ORCBlockOutputFormat.cpp      |  2 --
 .../Formats/Impl/ParquetBlockOutputFormat.cpp  |  2 +-
 .../02581_parquet_arrow_orc_compressions.sh    |  1 +
 6 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index b2b2c6d5b1e..1b32de9723f 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1973,6 +1973,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
 - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
 - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
 - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
+- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
 
 ## Arrow {#data-format-arrow}
 
@@ -2041,6 +2042,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
 - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
 - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
 - [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
+- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.
 
 ## ArrowStream {#data-format-arrow-stream}
 
@@ -2096,6 +2098,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
 ### Arrow format settings {#parquet-format-settings}
 
 - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
+- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
 - [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
 - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
 - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 3580d83f704..919ebaf562f 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column
 
 Enabled by default.
 
+### output_format_arrow_compression_method {#output_format_arrow_compression_method}
+
+Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
+
+Default value: `none`.
+
 ## ORC format settings {#orc-format-settings}
 
 ### input_format_orc_import_nested {#input_format_orc_import_nested}
@@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.
 
 Disabled by default.
 
+### output_format_orc_compression_method {#output_format_orc_compression_method}
+
+Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
+
+Default value: `none`.
+
 ## Parquet format settings {#parquet-format-settings}
 
 ### input_format_parquet_import_nested {#input_format_parquet_import_nested}
@@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`,
 
 Default value: `2.latest`.
 
+### output_format_parquet_compression_method {#output_format_parquet_compression_method}
+
+Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
+
+Default value: `snappy`.
+
 ## Hive format settings {#hive-format-settings}
 
 ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
index ec35c52e37c..c85c0342c8c 100644
--- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
@@ -8,7 +8,6 @@
 #include <arrow/result.h>
 #include "ArrowBufferedStreams.h"
 #include "CHColumnToArrowColumn.h"
-#include "config.h"
 
 
 namespace DB
@@ -36,7 +35,6 @@ arrow::Compression::type getArrowCompression(FormatSettings::ArrowCompression me
 
 }
 
-
 ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_)
     : IOutputFormat(header_, out_)
     , stream{stream_}
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index ecb7c2fbc92..39cacde94ed 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -22,8 +22,6 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 
-#include "config.h"
-
 namespace DB
 {
 
diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index 742912df980..cedd8a9c54c 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -14,6 +14,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_EXCEPTION;
+    extern const int NOT_IMPLEMENTED;
 }
 
 namespace
@@ -61,7 +62,6 @@ parquet::Compression::type getParquetCompression(FormatSettings::ParquetCompress
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
 }
 
-
 }
 
 ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh
index 10850e17bea..89b5147f026 100755
--- a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh
+++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-fasttest
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 5ddc9a9eedd1bd26881142e5ce5aacf020ed7983 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 1 Mar 2023 10:37:47 +0000
Subject: [PATCH 149/333] Add binary compatibility check for Aarch64

---
 .github/workflows/backport_branches.yml | 40 +++++++++++--
 .github/workflows/master.yml            | 40 +++++++++++--
 .github/workflows/pull_request.yml      | 40 +++++++++++--
 .github/workflows/release_branches.yml  | 40 +++++++++++--
 tests/ci/ci_config.py                   |  5 +-
 tests/ci/compatibility_check.py         | 80 ++++++++++++++++++-------
 6 files changed, 207 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 110c06631c7..b14a32127c4 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -79,7 +79,7 @@ jobs:
         with:
           name: changed_images
           path: ${{ runner.temp }}/changed_images.json
-  CompatibilityCheck:
+  CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, style-checker]
     steps:
@@ -98,12 +98,43 @@ jobs:
         uses: actions/download-artifact@v3
         with:
           path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
         run: |
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
       - name: Cleanup
         if: always()
         run: |
@@ -741,7 +772,8 @@ jobs:
       - FunctionalStatefulTestDebug
       - StressTestTsan
       - IntegrationTestsRelease
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 7e045992dee..e224d6cf5c3 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -110,7 +110,7 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
-  CompatibilityCheck:
+  CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, style-checker]
     steps:
@@ -129,12 +129,43 @@ jobs:
         uses: actions/download-artifact@v3
         with:
           path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
         run: |
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
       - name: Cleanup
         if: always()
         run: |
@@ -3124,7 +3155,8 @@ jobs:
       - PerformanceComparisonX86-1
       - PerformanceComparisonX86-2
       - PerformanceComparisonX86-3
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
       - ASTFuzzerTestDebug
       - ASTFuzzerTestAsan
       - ASTFuzzerTestTsan
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 7d410f833c5..ae6cb1d3a83 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -174,7 +174,7 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
-  CompatibilityCheck:
+  CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, style-checker]
     steps:
@@ -193,12 +193,43 @@ jobs:
         uses: actions/download-artifact@v3
         with:
           path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
         run: |
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
       - name: Cleanup
         if: always()
         run: |
@@ -4792,7 +4823,8 @@ jobs:
       - UnitTestsMsan
       - UnitTestsUBsan
       - UnitTestsReleaseClang
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
       - IntegrationTestsFlakyCheck
       - SQLancerTestRelease
       - SQLancerTestDebug
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 4d2a99c2106..229532efdab 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -71,7 +71,7 @@ jobs:
         with:
           name: changed_images
           path: ${{ runner.temp }}/changed_images.json
-  CompatibilityCheck:
+  CompatibilityCheckX86:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, style-checker]
     steps:
@@ -90,12 +90,43 @@ jobs:
         uses: actions/download-artifact@v3
         with:
           path: ${{ env.REPORTS_PATH }}
-      - name: CompatibilityCheck
+      - name: CompatibilityCheckX86
         run: |
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
+  CompatibilityCheckAarch64:
+    needs: [BuilderDebAarch64]
+    runs-on: [self-hosted, style-checker]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/compatibility_check
+          REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: CompatibilityCheckAarch64
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
       - name: Cleanup
         if: always()
         run: |
@@ -1947,7 +1978,8 @@ jobs:
       - IntegrationTestsTsan1
       - IntegrationTestsTsan2
       - IntegrationTestsTsan3
-      - CompatibilityCheck
+      - CompatibilityCheckX86
+      - CompatibilityCheckAarch64
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Check out repository code
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 08cd2d466d0..9df198430d2 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -316,9 +316,12 @@ CI_CONFIG = {
         "Integration tests flaky check (asan)": {
             "required_build": "package_asan",
         },
-        "Compatibility check": {
+        "Compatibility check (x86)": {
             "required_build": "package_release",
         },
+        "Compatibility check (aarch64)": {
+            "required_build": "package_aarch64",
+        },
         "Unit tests (release-clang)": {
             "required_build": "binary_release",
         },
diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 0bdcf1ba3b4..9f3c1a1ca8b 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -2,6 +2,7 @@
 
 from distutils.version import StrictVersion
 from typing import List, Tuple
+import argparse
 import logging
 import os
 import subprocess
@@ -28,9 +29,7 @@ from upload_result_helper import upload_results
 
 IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
 IMAGE_CENTOS = "clickhouse/test-old-centos"
-MAX_GLIBC_VERSION = "2.4"
 DOWNLOAD_RETRIES_COUNT = 5
-CHECK_NAME = "Compatibility check"
 
 
 def process_os_check(log_path: str) -> TestResult:
@@ -43,7 +42,7 @@ def process_os_check(log_path: str) -> TestResult:
             return TestResult(name, "OK")
 
 
-def process_glibc_check(log_path: str) -> TestResults:
+def process_glibc_check(log_path: str, max_glibc_version: str) -> TestResults:
     test_results = []  # type: TestResults
     with open(log_path, "r") as log:
         for line in log:
@@ -53,7 +52,7 @@ def process_glibc_check(log_path: str) -> TestResults:
                 _, version = symbol_with_glibc.split("@GLIBC_")
                 if version == "PRIVATE":
                     test_results.append(TestResult(symbol_with_glibc, "FAIL"))
-                elif StrictVersion(version) > MAX_GLIBC_VERSION:
+                elif StrictVersion(version) > max_glibc_version:
                     test_results.append(TestResult(symbol_with_glibc, "FAIL"))
     if not test_results:
         test_results.append(TestResult("glibc check", "OK"))
@@ -61,10 +60,10 @@ def process_glibc_check(log_path: str) -> TestResults:
 
 
 def process_result(
-    result_folder: str, server_log_folder: str
+    result_folder: str, server_log_folder: str, max_glibc_version: str
 ) -> Tuple[str, str, TestResults, List[str]]:
     glibc_log_path = os.path.join(result_folder, "glibc.log")
-    test_results = process_glibc_check(glibc_log_path)
+    test_results = process_glibc_check(glibc_log_path, max_glibc_version)
 
     status = "success"
     description = "Compatibility check passed"
@@ -101,13 +100,18 @@ def process_result(
     return status, description, test_results, result_logs
 
 
-def get_run_commands(
-    build_path, result_folder, server_log_folder, image_centos, image_ubuntu
-):
+def get_run_commands_glibc(build_path, result_folder):
     return [
         f"readelf -s --wide {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
         f"readelf -s --wide {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
         f"readelf -s --wide {build_path}/usr/bin/clickhouse-library-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
+    ]
+
+
+def get_run_commands_distributions(
+    build_path, result_folder, server_log_folder, image_centos, image_ubuntu
+):
+    return [
         f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
         f"--volume={build_path}/etc/clickhouse-server:/config "
         f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
@@ -117,9 +121,21 @@ def get_run_commands(
     ]
 
 
+def parse_args():
+    parser = argparse.ArgumentParser("Check compatibility with old distributions")
+    parser.add_argument("--check-name", required=True)
+    parser.add_argument("--check-glibc", action="store_true")
+    parser.add_argument(
+        "--check-distributions", action="store_true"
+    )  # currently hardcoded to x86, don't enable for ARM
+    return parser.parse_args()
+
+
 def main():
     logging.basicConfig(level=logging.INFO)
 
+    args = parse_args()
+
     stopwatch = Stopwatch()
 
     temp_path = TEMP_PATH
@@ -129,13 +145,11 @@ def main():
 
     gh = Github(get_best_robot_token(), per_page=100)
 
-    rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME)
+    rerun_helper = RerunHelper(gh, pr_info, args.check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
 
-    docker_images = get_images_with_versions(reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU])
-
     packages_path = os.path.join(temp_path, "packages")
     if not os.path.exists(packages_path):
         os.makedirs(packages_path)
@@ -145,7 +159,7 @@ def main():
             "clickhouse-common-static_" in url or "clickhouse-server_" in url
         )
 
-    download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
+    download_builds_filter(args.check_name, reports_path, packages_path, url_filter)
 
     for f in os.listdir(packages_path):
         if ".deb" in f:
@@ -162,9 +176,24 @@ def main():
     if not os.path.exists(result_path):
         os.makedirs(result_path)
 
-    run_commands = get_run_commands(
-        packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
-    )
+    run_commands = []
+
+    if args.check_glibc:
+        check_glibc_commands = get_run_commands_glibc(packages_path, result_path)
+        run_commands.extend(check_glibc_commands)
+
+    if args.check_distributions:
+        docker_images = get_images_with_versions(
+            reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU]
+        )
+        check_distributions_commands = get_run_commands_distributions(
+            packages_path,
+            result_path,
+            server_log_path,
+            docker_images[0],
+            docker_images[1],
+        )
+        run_commands.extend(check_distributions_commands)
 
     state = "success"
     for run_command in run_commands:
@@ -177,13 +206,22 @@ def main():
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
 
+    # See https://sourceware.org/glibc/wiki/Glibc%20Timeline
+    max_glibc_version = ""
+    if "x86" in args.check_name:
+        max_glibc_version = "2.4"
+    elif "aarch64" in args.check_nam:
+        max_glibc_version = "2.18" # because of build with newer sysroot?
+    else:
+        raise Exception("Can't determine max glibc version")
+
     s3_helper = S3Helper()
     state, description, test_results, additional_logs = process_result(
-        result_path, server_log_path
+        result_path, server_log_path, max_glibc_version
     )
 
     ch_helper = ClickHouseHelper()
-    mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
+    mark_flaky_tests(ch_helper, args.check_name, test_results)
 
     report_url = upload_results(
         s3_helper,
@@ -191,10 +229,10 @@ def main():
         pr_info.sha,
         test_results,
         additional_logs,
-        CHECK_NAME,
+        args.check_name,
     )
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
+    post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
@@ -203,7 +241,7 @@ def main():
         stopwatch.duration_seconds,
         stopwatch.start_time_str,
         report_url,
-        CHECK_NAME,
+        args.check_name,
     )
 
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)

From 33cafc6e891a692af3d1315706afb8cea95804e2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 2 Mar 2023 11:22:32 +0000
Subject: [PATCH 150/333] Automatic style fix

---
 tests/ci/compatibility_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 9f3c1a1ca8b..398f3df3deb 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -211,7 +211,7 @@ def main():
     if "x86" in args.check_name:
         max_glibc_version = "2.4"
     elif "aarch64" in args.check_nam:
-        max_glibc_version = "2.18" # because of build with newer sysroot?
+        max_glibc_version = "2.18"  # because of build with newer sysroot?
     else:
         raise Exception("Can't determine max glibc version")
 

From cdc8596f81cd4322fda3a188ffcbb9a4065e8882 Mon Sep 17 00:00:00 2001
From: KevinyhZou <kevinyunhe@163.com>
Date: Thu, 2 Mar 2023 20:08:39 +0800
Subject: [PATCH 151/333] enable int type in repeat function

---
 .../functions/string-functions.md             |  2 +-
 src/Functions/repeat.cpp                      | 29 +++++++++++++------
 .../01013_repeat_function.reference           |  8 ++---
 .../0_stateless/01013_repeat_function.sql     |  8 ++---
 4 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 845be6e04c7..f3c5b20f886 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -330,7 +330,7 @@ repeat(s, n)
 **Arguments**
 
 -   `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
--   `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md).
+-   `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp
index dcd05f373fc..0c323c39969 100644
--- a/src/Functions/repeat.cpp
+++ b/src/Functions/repeat.cpp
@@ -39,13 +39,15 @@ struct RepeatImpl
                 size, max_string_size);
     }
 
+    template <typename T>
     static void vectorStrConstRepeat(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets,
-        UInt64 repeat_time)
+        T repeat_time)
     {
+        repeat_time = repeat_time < 0 ? 0 : repeat_time;
         checkRepeatTime(repeat_time);
 
         UInt64 data_size = 0;
@@ -77,7 +79,8 @@ struct RepeatImpl
         res_offsets.assign(offsets);
         for (UInt64 i = 0; i < col_num.size(); ++i)
         {
-            size_t repeated_size = (offsets[i] - offsets[i - 1] - 1) * col_num[i] + 1;
+            T repeat_time = col_num[i] < 0 ? 0 : col_num[i];
+            size_t repeated_size = (offsets[i] - offsets[i - 1] - 1) * repeat_time + 1;
             checkStringSize(repeated_size);
             data_size += repeated_size;
             res_offsets[i] = data_size;
@@ -86,7 +89,7 @@ struct RepeatImpl
 
         for (UInt64 i = 0; i < col_num.size(); ++i)
         {
-            T repeat_time = col_num[i];
+            T repeat_time = col_num[i] < 0 ? 0 : col_num[i];
             checkRepeatTime(repeat_time);
             process(data.data() + offsets[i - 1], res_data.data() + res_offsets[i - 1], offsets[i] - offsets[i - 1], repeat_time);
         }
@@ -105,7 +108,8 @@ struct RepeatImpl
         UInt64 col_size = col_num.size();
         for (UInt64 i = 0; i < col_size; ++i)
         {
-            size_t repeated_size = str_size * col_num[i] + 1;
+            T repeat_time = col_num[i] < 0 ? 0 : col_num[i];
+            size_t repeated_size = str_size * repeat_time + 1;
             checkStringSize(repeated_size);
             data_size += repeated_size;
             res_offsets[i] = data_size;
@@ -113,7 +117,7 @@ struct RepeatImpl
         res_data.resize(data_size);
         for (UInt64 i = 0; i < col_size; ++i)
         {
-            T repeat_time = col_num[i];
+            T repeat_time = col_num[i] < 0 ? 0 : col_num[i];
             checkRepeatTime(repeat_time);
             process(
                 reinterpret_cast<UInt8 *>(const_cast<char *>(copy_str.data())),
@@ -168,7 +172,8 @@ class FunctionRepeat : public IFunction
     template <typename F>
     static bool castType(const IDataType * type, F && f)
     {
-        return castTypeToEither<DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64>(type, std::forward<F>(f));
+        return castTypeToEither<DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
+            DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64>(type, std::forward<F>(f));
     }
 
 public:
@@ -186,7 +191,7 @@ public:
         if (!isString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
                 arguments[0]->getName(), getName());
-        if (!isUnsignedInteger(arguments[1]))
+        if (!isInteger(arguments[1]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
                 arguments[1]->getName(), getName());
         return arguments[0];
@@ -204,9 +209,15 @@ public:
         {
             if (const ColumnConst * scale_column_num = checkAndGetColumn<ColumnConst>(numcolumn.get()))
             {
-                UInt64 repeat_time = scale_column_num->getValue<UInt64>();
                 auto col_res = ColumnString::create();
-                RepeatImpl::vectorStrConstRepeat(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), repeat_time);
+                castType(arguments[1].type.get(), [&](const auto & type)
+                {
+                    using DataType = std::decay_t<decltype(type)>;
+                    using T = typename DataType::FieldType;
+                    T repeat_time = scale_column_num->getValue<T>();
+                    RepeatImpl::vectorStrConstRepeat(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), repeat_time);
+                    return true;
+                });
                 return col_res;
             }
             else if (castType(arguments[1].type.get(), [&](const auto & type)
diff --git a/tests/queries/0_stateless/01013_repeat_function.reference b/tests/queries/0_stateless/01013_repeat_function.reference
index 46bb248a99a..ea0dadd524f 100644
--- a/tests/queries/0_stateless/01013_repeat_function.reference
+++ b/tests/queries/0_stateless/01013_repeat_function.reference
@@ -1,7 +1,7 @@
 abcabcabcabcabcabcabcabcabcabc
 abcabcabc
-sdfggsdfgg
-xywq
+
+
 
 abcabcabcabcabcabcabcabcabcabcabcabc
 sdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfgg
@@ -20,8 +20,8 @@ sdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfgg
 xywqxywqxywqxywqxywqxywqxywqxywqxywqxywq
 plkfplkfplkfplkfplkfplkfplkfplkfplkfplkf
 abcabcabc
-abcabc
-abc
+
+
 
 abcabcabcabcabcabcabcabcabcabcabcabc
 abcabcabcabcabcabcabcabcabcabc
diff --git a/tests/queries/0_stateless/01013_repeat_function.sql b/tests/queries/0_stateless/01013_repeat_function.sql
index 85b0c16b4ab..b29cc032f28 100644
--- a/tests/queries/0_stateless/01013_repeat_function.sql
+++ b/tests/queries/0_stateless/01013_repeat_function.sql
@@ -3,20 +3,20 @@ DROP TABLE IF EXISTS defaults;
 CREATE TABLE defaults
 (
     strings String,
-    u8 UInt8,
+    i8 Int8,
     u16 UInt16,
     u32 UInt32,
     u64 UInt64
 )ENGINE = Memory();
 
-INSERT INTO defaults values ('abc', 3, 12, 4, 56) ('sdfgg', 2, 10, 21, 200) ('xywq', 1, 4, 9, 5) ('plkf', 0, 5, 7,77);
+INSERT INTO defaults values ('abc', 3, 12, 4, 56) ('sdfgg', -2, 10, 21, 200) ('xywq', -1, 4, 9, 5) ('plkf', 0, 5, 7,77);
 
-SELECT repeat(strings, u8) FROM defaults;
+SELECT repeat(strings, i8) FROM defaults;
 SELECT repeat(strings, u16) FROM defaults;
 SELECT repeat(strings, u32) from defaults;
 SELECT repeat(strings, u64) FROM defaults;
 SELECT repeat(strings, 10) FROM defaults;
-SELECT repeat('abc', u8) FROM defaults;
+SELECT repeat('abc', i8) FROM defaults;
 SELECT repeat('abc', u16) FROM defaults;
 SELECT repeat('abc', u32) FROM defaults;
 SELECT repeat('abc', u64) FROM defaults;

From 10610cbdf475d47af8f301bac415c436404c16dd Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 2 Mar 2023 14:46:20 +0100
Subject: [PATCH 152/333] Fix test

---
 src/Disks/getOrCreateDiskFromAST.cpp                  | 5 -----
 src/Parsers/FieldFromAST.cpp                          | 6 +++---
 tests/queries/0_stateless/02344_show_caches.reference | 1 +
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp
index 9abc4a7d413..637acff7b95 100644
--- a/src/Disks/getOrCreateDiskFromAST.cpp
+++ b/src/Disks/getOrCreateDiskFromAST.cpp
@@ -33,11 +33,6 @@ namespace
         auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX
             + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size()));
 
-        LOG_TRACE(
-            &Poco::Logger::get("getOrCreateDiskFromDiskAST"),
-            "Using disk name `{}` for custom disk {}",
-            disk_name, disk_setting_string);
-
         auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr {
             const auto * function_args_expr = assert_cast<const ASTExpressionList *>(function.arguments.get());
             const auto & function_args = function_args_expr->children;
diff --git a/src/Parsers/FieldFromAST.cpp b/src/Parsers/FieldFromAST.cpp
index b2251599441..a81bf45a8be 100644
--- a/src/Parsers/FieldFromAST.cpp
+++ b/src/Parsers/FieldFromAST.cpp
@@ -32,7 +32,7 @@ bool FieldFromASTImpl::isSecret() const
     return isDiskFunction(ast);
 }
 
-class DiskConfigurationHider
+class DiskConfigurationMasker
 {
 public:
     struct Data {};
@@ -50,7 +50,7 @@ public:
             auto is_secret_arg = [](const std::string & arg_name)
             {
                 /// We allow to not hide type of the disk, e.g. disk(type = s3, ...)
-                /// and also nested disk, e.g. disk(type = cache, disk = disk(type = s3, ...))
+                /// and also nested disk, e.g. disk = 'disk_name'
                 return arg_name != "type" && arg_name != "disk";
             };
 
@@ -81,7 +81,7 @@ public:
 };
 
 /// Visits children first.
-using HideDiskConfigurationVisitor = InDepthNodeVisitor<DiskConfigurationHider, false>;
+using HideDiskConfigurationVisitor = InDepthNodeVisitor<DiskConfigurationMasker, false>;
 
 String FieldFromASTImpl::toString(bool show_secrets) const
 {
diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference
index 2ee4f902ba1..b321319a309 100644
--- a/tests/queries/0_stateless/02344_show_caches.reference
+++ b/tests/queries/0_stateless/02344_show_caches.reference
@@ -10,5 +10,6 @@ local_cache
 s3_cache_6
 s3_cache_small
 local_cache_2
+__tmp_internal_324081342946782869538999598488311137423
 local_cache_3
 s3_cache_multi_2

From cf245538b91923a97c617a997e2bc68d01b51d75 Mon Sep 17 00:00:00 2001
From: AndyB <andrey.bystrov@deliveroo.co.uk>
Date: Thu, 2 Mar 2023 14:31:38 +0000
Subject: [PATCH 153/333] checker fix

---
 tests/integration/test_log_levels_update/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_log_levels_update/test.py b/tests/integration/test_log_levels_update/test.py
index 842c7914eaa..4b83b6431fc 100644
--- a/tests/integration/test_log_levels_update/test.py
+++ b/tests/integration/test_log_levels_update/test.py
@@ -63,4 +63,4 @@ def test_log_levels_update(start_cluster):
 
     log = get_log(node)
     assert len(log) > 0
-    assert not re.search("(<Trace>|<Debug>)", log)
+    assert not re.search("<Trace>", log)

From fdf8a418f517eeb739aa22e6afb0d7392a212a17 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 1 Mar 2023 20:37:20 +0000
Subject: [PATCH 154/333] better interface

---
 src/DataTypes/DataTypeTuple.cpp                    | 14 --------------
 src/DataTypes/DataTypeTuple.h                      |  1 -
 src/DataTypes/IDataType.cpp                        |  9 ---------
 src/DataTypes/IDataType.h                          |  1 -
 src/DataTypes/Serializations/SerializationInfo.cpp |  9 +++++++++
 src/DataTypes/Serializations/SerializationInfo.h   |  2 ++
 .../Serializations/SerializationInfoTuple.cpp      | 14 ++++++++++++++
 .../Serializations/SerializationInfoTuple.h        |  2 +-
 src/Storages/MergeTree/MutateTask.cpp              |  2 +-
 9 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index 1e28af3ee54..768f87fe3d4 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -312,20 +312,6 @@ MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const Seriali
     return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
 }
 
-MutableSerializationInfoPtr DataTypeTuple::cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const
-{
-    const auto & old_info_tuple = assert_cast<const SerializationInfoTuple &>(old_info);
-    assert(old_info_tuple.getNumElements() == elems.size());
-
-    MutableSerializationInfos infos;
-    infos.reserve(elems.size());
-    for (size_t i = 0; i < elems.size(); ++i)
-        infos.push_back(elems[i]->cloneSerializationInfo(*old_info_tuple.getElementInfo(i), settings));
-
-    return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
-}
-
-
 SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) const
 {
     if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h
index 9f3860f78db..152f21015f5 100644
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@@ -57,7 +57,6 @@ public:
     SerializationPtr doGetDefaultSerialization() const override;
     SerializationPtr getSerialization(const SerializationInfo & info) const override;
     MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const override;
-    MutableSerializationInfoPtr cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const override;
     SerializationInfoPtr getSerializationInfo(const IColumn & column) const override;
 
     const DataTypePtr & getElement(size_t i) const { return elems[i]; }
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 5cd0b6f659c..e0612fbbf36 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -191,15 +191,6 @@ MutableSerializationInfoPtr IDataType::createSerializationInfo(const Serializati
     return std::make_shared<SerializationInfo>(ISerialization::Kind::DEFAULT, settings);
 }
 
-MutableSerializationInfoPtr IDataType::cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const
-{
-    auto new_kind = old_info.getKind();
-    if (new_kind == ISerialization::Kind::SPARSE && !supportsSparseSerialization())
-        new_kind = ISerialization::Kind::DEFAULT;
-
-    return std::make_shared<SerializationInfo>(new_kind, settings);
-}
-
 SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
 {
     if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 5bd12f84d04..e5bdbeca69e 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -104,7 +104,6 @@ public:
     Names getSubcolumnNames() const;
 
     virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const;
-    virtual MutableSerializationInfoPtr cloneSerializationInfo(const SerializationInfo & old_info, const SerializationInfo::Settings & settings) const;
     virtual SerializationInfoPtr getSerializationInfo(const IColumn & column) const;
 
     /// TODO: support more types.
diff --git a/src/DataTypes/Serializations/SerializationInfo.cpp b/src/DataTypes/Serializations/SerializationInfo.cpp
index 313fa1fa235..af3330d867d 100644
--- a/src/DataTypes/Serializations/SerializationInfo.cpp
+++ b/src/DataTypes/Serializations/SerializationInfo.cpp
@@ -97,6 +97,15 @@ MutableSerializationInfoPtr SerializationInfo::clone() const
     return std::make_shared<SerializationInfo>(kind, settings, data);
 }
 
+std::shared_ptr<SerializationInfo> SerializationInfo::createWithType(const IDataType & type, const Settings & new_settings) const
+{
+    auto new_kind = kind;
+    if (new_kind == ISerialization::Kind::SPARSE && !type.supportsSparseSerialization())
+        new_kind = ISerialization::Kind::DEFAULT;
+
+    return std::make_shared<SerializationInfo>(new_kind, new_settings);
+}
+
 void SerializationInfo::serialializeKindBinary(WriteBuffer & out) const
 {
     writeBinary(static_cast<UInt8>(kind), out);
diff --git a/src/DataTypes/Serializations/SerializationInfo.h b/src/DataTypes/Serializations/SerializationInfo.h
index 7f73d053f1b..560156980db 100644
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@@ -8,6 +8,7 @@
 namespace DB
 {
 
+class ReadBuffer;
 class ReadBuffer;
 class WriteBuffer;
 class NamesAndTypesList;
@@ -59,6 +60,7 @@ public:
     virtual void replaceData(const SerializationInfo & other);
 
     virtual std::shared_ptr<SerializationInfo> clone() const;
+    virtual std::shared_ptr<SerializationInfo> createWithType(const IDataType & type, const Settings & new_settings) const;
 
     virtual void serialializeKindBinary(WriteBuffer & out) const;
     virtual void deserializeFromKindsBinary(ReadBuffer & in);
diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
index 1a9639a1566..5724dd7a5c3 100644
--- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
@@ -97,6 +97,20 @@ MutableSerializationInfoPtr SerializationInfoTuple::clone() const
     return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
 }
 
+MutableSerializationInfoPtr SerializationInfoTuple::createWithType(const IDataType & type, const Settings & new_settings) const
+{
+    const auto & type_tuple = assert_cast<const DataTypeTuple &>(type);
+    const auto & tuple_elements = type_tuple.getElements();
+    assert(elems.size() == tuple_elements.size());
+
+    MutableSerializationInfos infos;
+    infos.reserve(elems.size());
+    for (size_t i = 0; i < elems.size(); ++i)
+        infos.push_back(elems[i]->createWithType(*tuple_elements[i], new_settings));
+
+    return std::make_shared<SerializationInfoTuple>(std::move(infos), names, new_settings);
+}
+
 void SerializationInfoTuple::serialializeKindBinary(WriteBuffer & out) const
 {
     SerializationInfo::serialializeKindBinary(out);
diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.h b/src/DataTypes/Serializations/SerializationInfoTuple.h
index ef288948b9a..4bcd14d34cb 100644
--- a/src/DataTypes/Serializations/SerializationInfoTuple.h
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.h
@@ -19,6 +19,7 @@ public:
     void replaceData(const SerializationInfo & other) override;
 
     MutableSerializationInfoPtr clone() const override;
+    MutableSerializationInfoPtr createWithType(const IDataType & type, const Settings & new_settings) const override;
 
     void serialializeKindBinary(WriteBuffer & out) const override;
     void deserializeFromKindsBinary(ReadBuffer & in) override;
@@ -26,7 +27,6 @@ public:
     Poco::JSON::Object toJSON() const override;
     void fromJSON(const Poco::JSON::Object & object) override;
 
-    size_t getNumElements() const { return elems.size(); }
     const MutableSerializationInfoPtr & getElementInfo(size_t i) const { return elems[i]; }
     ISerialization::Kind getElementKind(size_t i) const { return elems[i]->getKind(); }
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 9420b7bebaf..1239befdd67 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -244,7 +244,7 @@ getColumnsForNewDataPart(
             continue;
         }
 
-        new_info = new_type->cloneSerializationInfo(*old_info, settings);
+        new_info = old_info->createWithType(*new_type, settings);
         new_serialization_infos.emplace(new_name, std::move(new_info));
     }
 

From df41a83ddb1f83618ce1ad666545cf6138444df3 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 2 Mar 2023 14:49:49 +0000
Subject: [PATCH 155/333] set uid gid to file's original

---
 .../decompressor.cpp                          | 30 +++++--------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 5f2a769dcdb..39fe552dfac 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -168,23 +168,9 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
     return 0;
 }
 
-bool getSudoIDs(uid_t &sudo_uid, uid_t &sudo_gid)
+bool isSudo()
 {
-    sudo_uid = 0;
-    sudo_gid = 0;
-
-    if (getuid() || geteuid() || getenv("SUDO_USER") == nullptr || getenv("SUDO_UID") == nullptr || getenv("SUDO_GID") == nullptr)
-        return false;
-
-    char * str_end;
-    long id = strtol(getenv("SUDO_UID"), &str_end, 10);
-    if (*str_end == 0)
-        sudo_uid = static_cast<uid_t>(id);
-    id = strtol(getenv("SUDO_GID"), &str_end, 10);
-    if (*str_end == 0)
-        sudo_gid = static_cast<uid_t>(id);
-
-    return true;
+    return getuid() == 0 && geteuid() == 0 && getenv("SUDO_USER") && getenv("SUDO_UID") && getenv("SUDO_GID");
 }
 
 /// Read data about files and decomrpess them.
@@ -238,9 +224,7 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
         return 1;
     }
 
-    uid_t sudo_uid = 0;
-    uid_t sudo_gid = 0;
-    getSudoIDs(sudo_uid, sudo_gid);
+    bool is_sudo = isSudo();
 
     FileData file_info;
     /// Decompress files with appropriate file names
@@ -342,8 +326,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
         if (0 != close(output_fd))
             perror("close");
 
-        if (sudo_uid && sudo_gid)
-            chown(file_name, sudo_uid, sudo_gid);
+        if (is_sudo)
+            chown(file_name, info_in.st_uid, info_in.st_gid);
     }
 
     if (0 != munmap(input, info_in.st_size))
@@ -557,8 +541,8 @@ int main(int/* argc*/, char* argv[])
             return 1;
         }
 
-        if (uid_t sudo_uid = 0, sudo_gid = 0; getSudoIDs(sudo_uid, sudo_gid))
-            chown(static_cast<char *>(self), sudo_uid, sudo_gid);
+        if (isSudo())
+            chown(static_cast<char *>(self), input_info.st_uid, input_info.st_gid);
 
         if (has_exec)
         {

From 95292ce5c79a8386ec6736e1b3fd934c3a38e02a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 2 Mar 2023 16:26:46 +0000
Subject: [PATCH 156/333] Make 01710_projections more stable.

---
 tests/queries/0_stateless/01710_projections.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql
index cbabd3ec598..146b46aed38 100644
--- a/tests/queries/0_stateless/01710_projections.sql
+++ b/tests/queries/0_stateless/01710_projections.sql
@@ -1,6 +1,6 @@
 drop table if exists projection_test;
 
-create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain);
+create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain)  settings index_granularity_bytes = 10000000;
 
 insert into projection_test with rowNumberInAllBlocks() as id select 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * from generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) limit 1000 settings max_threads = 1;
 

From 48fc545e68fc86cb903a2272f1a62640429bfde2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Thu, 2 Mar 2023 17:27:57 +0100
Subject: [PATCH 157/333] Update 01710_projections.sql

---
 tests/queries/0_stateless/01710_projections.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql
index 146b46aed38..5097a88c8fa 100644
--- a/tests/queries/0_stateless/01710_projections.sql
+++ b/tests/queries/0_stateless/01710_projections.sql
@@ -1,6 +1,6 @@
 drop table if exists projection_test;
 
-create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain)  settings index_granularity_bytes = 10000000;
+create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain) settings index_granularity_bytes = 10000000;
 
 insert into projection_test with rowNumberInAllBlocks() as id select 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * from generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) limit 1000 settings max_threads = 1;
 

From 73a6c920d3dd595d6771e6801e91c5fa0bbf9446 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 2 Mar 2023 11:31:22 -0500
Subject: [PATCH 158/333] Update date-time-functions.md

closes #42758
---
 .../functions/date-time-functions.md          | 39 +++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index f6af8abcbaf..a6043d2507e 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1126,15 +1126,48 @@ Rounds the time to the half hour.
 
 ## toYYYYMM
 
-Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
+Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument.  If provided, the timezone must be a string constant.
+
+### example
+```sql
+SELECT
+    toYYYYMM(now(), 'US/Eastern')
+```
+```response
+┌─toYYYYMM(now(), 'US/Eastern')─┐
+│                        202303 │ 
+└───────────────────────────────┘
+```
 
 ## toYYYYMMDD
 
-Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
+Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).  Accepts a second optional timezone argument.  If provided, the timezone must be a string constant.
+
+### example
+```sql
+SELECT
+    toYYYYMMDD(now(), 'US/Eastern')
+```
+```response
+┌─toYYYYMMDD(now(), 'US/Eastern')─┐
+│                        20230302 │
+└─────────────────────────────────┘
+```
 
 ## toYYYYMMDDhhmmss
 
-Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
+Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).  Accepts a second optional timezone argument.  If provided, the timezone must be a string constant.
+
+### example
+```sql
+SELECT
+    toYYYYMMDDhhmmss(now(), 'US/Eastern')
+```
+```response
+┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
+│                        20230302112209 │
+└───────────────────────────────────────┘
+```
 
 ## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
 

From 63eeec1785c7fa6553e93f5cbe0d94ce0647d292 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 2 Mar 2023 17:44:48 +0100
Subject: [PATCH 159/333] Mark 01771_bloom_filter_not_has as no-parallel and
 long

This test can take long enough time in debug build > 10min, and because
clickhouse-test does not wait the test in this case and simply call DROP
DATABASE, this will eventually lead to error during DROP since the table
is still in use.

CI: https://s3.amazonaws.com/clickhouse-test-reports/45491/99329d868232d9377d7f808763e951e6f15fd71c/stateless_tests__debug__%5B5/5%5D.html
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/queries/0_stateless/01771_bloom_filter_not_has.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01771_bloom_filter_not_has.sql b/tests/queries/0_stateless/01771_bloom_filter_not_has.sql
index ab0e3d308f9..f945cbde56b 100644
--- a/tests/queries/0_stateless/01771_bloom_filter_not_has.sql
+++ b/tests/queries/0_stateless/01771_bloom_filter_not_has.sql
@@ -1,3 +1,4 @@
+-- Tags: no-parallel, long
 DROP TABLE IF EXISTS bloom_filter_null_array;
 CREATE TABLE bloom_filter_null_array (v Array(Int32), INDEX idx v TYPE bloom_filter GRANULARITY 3) ENGINE = MergeTree() ORDER BY v;
 INSERT INTO bloom_filter_null_array SELECT [number] FROM numbers(10000000);

From 14351922e3f250c4c92383462f701fcaef198054 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 2 Mar 2023 17:20:29 +0000
Subject: [PATCH 160/333] Fix possible deadlock in QueryStatus

---
 src/Interpreters/ProcessList.cpp | 58 ++++++++++++++++++++++++++++----
 src/Interpreters/ProcessList.h   | 16 ++++++++-
 2 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 49d7989ac5e..52674dc1c77 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -373,6 +373,19 @@ QueryStatus::~QueryStatus()
     }
 }
 
+void QueryStatus::ExecutorHolder::cancel()
+{
+    std::lock_guard lock(mutex);
+    if (executor)
+        executor->cancel();
+}
+
+void QueryStatus::ExecutorHolder::remove()
+{
+    std::lock_guard lock(mutex);
+    executor = nullptr;
+}
+
 CancellationCode QueryStatus::cancelQuery(bool)
 {
     if (is_killed.load())
@@ -380,9 +393,37 @@ CancellationCode QueryStatus::cancelQuery(bool)
 
     is_killed.store(true);
 
-    std::lock_guard lock(executors_mutex);
-    for (auto * e : executors)
-        e->cancel();
+    std::unique_lock lock(executors_mutex);
+
+    /// Track all cancelled executors.
+    std::unordered_set<ExecutorHolder *> cancelled;
+    /// We cancel executors from the left to the right, so if the last executor
+    /// was cancelled, then all executors were cancelled.
+    while (!cancelled.contains(executors.back().get()))
+    {
+        size_t size = executors.size();
+        /// We should create a copy of executor holder, because it can be
+        /// removed from vector in removePipelineExecutor from another thread
+        /// and reference will be invalid.
+        for (auto e : executors)
+        {
+            if (cancelled.contains(e.get()))
+                continue;
+            /// We should call cancel() with unlocked executors_mutex, because
+            /// cancel() can try to lock some internal mutex that is already locked by query executing
+            /// thread, and query executing thread can call removePipelineExecutor and lock executors_mutex,
+            /// which will lead to deadlock.
+            lock.unlock();
+            e->cancel();
+            lock.lock();
+            cancelled.insert(e.get());
+            /// While executors_mutex was unlocked, removePipelineExecutor could be called and
+            /// the size of executors could have changed. In this case we should start iterating
+            /// over it again to avoid using invalid iterators.
+            if (executors.size() != size)
+                break;
+        }
+    }
 
     return CancellationCode::CancelSent;
 }
@@ -396,15 +437,18 @@ void QueryStatus::addPipelineExecutor(PipelineExecutor * e)
         throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
 
     std::lock_guard lock(executors_mutex);
-    assert(std::find(executors.begin(), executors.end(), e) == executors.end());
-    executors.push_back(e);
+    assert(std::find_if(executors.begin(), executors.end(), [e](const ExecutorHolderPtr & x){ return x->executor == e; }) == executors.end());
+    executors.push_back(std::make_shared<ExecutorHolder>(e));
 }
 
 void QueryStatus::removePipelineExecutor(PipelineExecutor * e)
 {
     std::lock_guard lock(executors_mutex);
-    assert(std::find(executors.begin(), executors.end(), e) != executors.end());
-    std::erase_if(executors, [e](PipelineExecutor * x) { return x == e; });
+    auto it = std::find_if(executors.begin(), executors.end(), [e](const ExecutorHolderPtr & x){ return x->executor == e; });
+    assert(it != executors.end());
+    /// Invalidate executor pointer inside holder.
+    (*it)->remove();
+    executors.erase(it);
 }
 
 bool QueryStatus::checkTimeLimit()
diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h
index d5c136ab62a..30bfde4e218 100644
--- a/src/Interpreters/ProcessList.h
+++ b/src/Interpreters/ProcessList.h
@@ -119,8 +119,22 @@ protected:
 
     mutable std::mutex executors_mutex;
 
+    struct ExecutorHolder
+    {
+        ExecutorHolder(PipelineExecutor * e) : executor(e) {}
+
+        void cancel();
+
+        void remove();
+
+        PipelineExecutor * executor;
+        std::mutex mutex;
+    };
+
+    using ExecutorHolderPtr = std::shared_ptr<ExecutorHolder>;
+
     /// Array of PipelineExecutors to be cancelled when a cancelQuery is received
-    std::vector<PipelineExecutor *> executors;
+    std::vector<ExecutorHolderPtr> executors;
 
     enum QueryStreamsStatus
     {

From fc8ea01c2b2a198d2170f8c4f9ca775f5d3a37b4 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 2 Mar 2023 18:23:31 +0100
Subject: [PATCH 161/333] use unique names and paths in
 test_replicated_database

---
 .../test_replicated_database/test.py          | 249 +++++++++---------
 1 file changed, 126 insertions(+), 123 deletions(-)

diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index ead9a762b1b..c71c76244ce 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -80,15 +80,15 @@ def started_cluster():
 
 def test_create_replicated_table(started_cluster):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica' || '1');"
+        "CREATE DATABASE create_replicated_table ENGINE = Replicated('/test/create_replicated_table', 'shard1', 'replica' || '1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE create_replicated_table ENGINE = Replicated('/test/create_replicated_table', 'shard1', 'replica2');"
     )
     assert (
         "Explicit zookeeper_path and replica_name are specified"
         in main_node.query_and_get_error(
-            "CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) "
+            "CREATE TABLE create_replicated_table.replicated_table (d Date, k UInt64, i32 Int32) "
             "ENGINE=ReplicatedMergeTree('/test/tmp', 'r') ORDER BY k PARTITION BY toYYYYMM(d);"
         )
     )
@@ -96,7 +96,7 @@ def test_create_replicated_table(started_cluster):
     assert (
         "Explicit zookeeper_path and replica_name are specified"
         in main_node.query_and_get_error(
-            "CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) "
+            "CREATE TABLE create_replicated_table.replicated_table (d Date, k UInt64, i32 Int32) "
             "ENGINE=ReplicatedMergeTree('/test/tmp', 'r') ORDER BY k PARTITION BY toYYYYMM(d);"
         )
     )
@@ -104,39 +104,39 @@ def test_create_replicated_table(started_cluster):
     assert (
         "This syntax for *MergeTree engine is deprecated"
         in main_node.query_and_get_error(
-            "CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) "
+            "CREATE TABLE create_replicated_table.replicated_table (d Date, k UInt64, i32 Int32) "
             "ENGINE=ReplicatedMergeTree('/test/tmp/{shard}', '{replica}', d, k, 8192);"
         )
     )
 
     main_node.query(
-        "CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);"
+        "CREATE TABLE create_replicated_table.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);"
     )
 
     expected = (
-        "CREATE TABLE testdb.replicated_table\\n(\\n    `d` Date,\\n    `k` UInt64,\\n    `i32` Int32\\n)\\n"
+        "CREATE TABLE create_replicated_table.replicated_table\\n(\\n    `d` Date,\\n    `k` UInt64,\\n    `i32` Int32\\n)\\n"
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\n"
         "PARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192"
     )
-    assert_create_query([main_node, dummy_node], "testdb.replicated_table", expected)
+    assert_create_query([main_node, dummy_node], "create_replicated_table.replicated_table", expected)
     # assert without replacing uuid
-    assert main_node.query("show create testdb.replicated_table") == dummy_node.query(
-        "show create testdb.replicated_table"
+    assert main_node.query("show create create_replicated_table.replicated_table") == dummy_node.query(
+        "show create create_replicated_table.replicated_table"
     )
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE create_replicated_table SYNC")
+    dummy_node.query("DROP DATABASE create_replicated_table SYNC")
 
 
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_simple_alter_table(started_cluster, engine):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica2');"
     )
     # test_simple_alter_table
-    name = "testdb.alter_test_{}".format(engine)
+    name = "test_simple_alter_table.alter_test_{}".format(engine)
     main_node.query(
         "CREATE TABLE {} "
         "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
@@ -184,10 +184,10 @@ def test_simple_alter_table(started_cluster, engine):
 
     # test_create_replica_after_delay
     competing_node.query(
-        "CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');"
+        "CREATE DATABASE IF NOT EXISTS test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica3');"
     )
 
-    name = "testdb.alter_test_{}".format(engine)
+    name = "test_simple_alter_table.alter_test_{}".format(engine)
     main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name))
     main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name))
     main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name))
@@ -207,21 +207,21 @@ def test_simple_alter_table(started_cluster, engine):
     )
 
     assert_create_query([main_node, dummy_node, competing_node], name, expected)
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
-    competing_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE test_simple_alter_table SYNC")
+    dummy_node.query("DROP DATABASE test_simple_alter_table SYNC")
+    competing_node.query("DROP DATABASE test_simple_alter_table SYNC")
 
 
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_delete_from_table(started_cluster, engine):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica1');"
+        "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard2', 'replica1');"
     )
 
-    name = "testdb.delete_test_{}".format(engine)
+    name = "delete_from_table.delete_test_{}".format(engine)
     main_node.query(
         "CREATE TABLE {} "
         "(id UInt64, value String) "
@@ -238,7 +238,7 @@ def test_delete_from_table(started_cluster, engine):
 
     table_for_select = name
     if not "Replicated" in engine:
-        table_for_select = "cluster('testdb', {})".format(name)
+        table_for_select = "cluster('delete_from_table', {})".format(name)
     for node in [main_node, dummy_node]:
         assert_eq_with_retry(
             node,
@@ -246,8 +246,8 @@ def test_delete_from_table(started_cluster, engine):
             expected,
         )
 
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE delete_from_table SYNC")
+    dummy_node.query("DROP DATABASE delete_from_table SYNC")
 
 
 def get_table_uuid(database, name):
@@ -276,17 +276,17 @@ def fixture_attachable_part(started_cluster):
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_alter_attach(started_cluster, attachable_part, engine):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica2');"
     )
 
     name = "alter_attach_test_{}".format(engine)
     main_node.query(
-        f"CREATE TABLE testdb.{name} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
+        f"CREATE TABLE alter_attach.{name} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
     )
-    table_uuid = get_table_uuid("testdb", name)
+    table_uuid = get_table_uuid("alter_attach", name)
     # Provide and attach a part to the main node
     shutil.copytree(
         attachable_part,
@@ -295,113 +295,113 @@ def test_alter_attach(started_cluster, attachable_part, engine):
             f"database/store/{table_uuid[:3]}/{table_uuid}/detached/all_1_1_0",
         ),
     )
-    main_node.query(f"ALTER TABLE testdb.{name} ATTACH PART 'all_1_1_0'")
+    main_node.query(f"ALTER TABLE alter_attach.{name} ATTACH PART 'all_1_1_0'")
     # On the main node, data is attached
-    assert main_node.query(f"SELECT CounterID FROM testdb.{name}") == "123\n"
+    assert main_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n"
     # On the other node, data is replicated only if using a Replicated table engine
     if engine == "ReplicatedMergeTree":
-        assert dummy_node.query(f"SELECT CounterID FROM testdb.{name}") == "123\n"
+        assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n"
     else:
-        assert dummy_node.query(f"SELECT CounterID FROM testdb.{name}") == ""
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+        assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == ""
+    main_node.query("DROP DATABASE alter_attach SYNC")
+    dummy_node.query("DROP DATABASE alter_attach SYNC")
 
 
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_alter_drop_part(started_cluster, engine):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica2');"
     )
 
     table = f"alter_drop_{engine}"
     part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0"
     main_node.query(
-        f"CREATE TABLE testdb.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
+        f"CREATE TABLE alter_drop_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
     )
-    main_node.query(f"INSERT INTO testdb.{table} VALUES (123)")
+    main_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (123)")
     if engine == "MergeTree":
-        dummy_node.query(f"INSERT INTO testdb.{table} VALUES (456)")
-    main_node.query(f"ALTER TABLE testdb.{table} DROP PART '{part_name}'")
-    assert main_node.query(f"SELECT CounterID FROM testdb.{table}") == ""
+        dummy_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (456)")
+    main_node.query(f"ALTER TABLE alter_drop_part.{table} DROP PART '{part_name}'")
+    assert main_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == ""
     if engine == "ReplicatedMergeTree":
         # The DROP operation is still replicated at the table engine level
-        assert dummy_node.query(f"SELECT CounterID FROM testdb.{table}") == ""
+        assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == ""
     else:
-        assert dummy_node.query(f"SELECT CounterID FROM testdb.{table}") == "456\n"
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+        assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "456\n"
+    main_node.query("DROP DATABASE alter_drop_part SYNC")
+    dummy_node.query("DROP DATABASE alter_drop_part SYNC")
 
 
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_alter_detach_part(started_cluster, engine):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica2');"
     )
 
     table = f"alter_detach_{engine}"
     part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0"
     main_node.query(
-        f"CREATE TABLE testdb.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
+        f"CREATE TABLE alter_detach_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
     )
-    main_node.query(f"INSERT INTO testdb.{table} VALUES (123)")
+    main_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (123)")
     if engine == "MergeTree":
-        dummy_node.query(f"INSERT INTO testdb.{table} VALUES (456)")
-    main_node.query(f"ALTER TABLE testdb.{table} DETACH PART '{part_name}'")
-    detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='testdb' AND table='{table}'"
+        dummy_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (456)")
+    main_node.query(f"ALTER TABLE alter_detach_part.{table} DETACH PART '{part_name}'")
+    detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_detach_part' AND table='{table}'"
     assert main_node.query(detached_parts_query) == f"{part_name}\n"
     if engine == "ReplicatedMergeTree":
         # The detach operation is still replicated at the table engine level
         assert dummy_node.query(detached_parts_query) == f"{part_name}\n"
     else:
         assert dummy_node.query(detached_parts_query) == ""
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE alter_detach_part SYNC")
+    dummy_node.query("DROP DATABASE alter_detach_part SYNC")
 
 
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_alter_drop_detached_part(started_cluster, engine):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica2');"
     )
 
     table = f"alter_drop_detached_{engine}"
     part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0"
     main_node.query(
-        f"CREATE TABLE testdb.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
+        f"CREATE TABLE alter_drop_detached_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
     )
-    main_node.query(f"INSERT INTO testdb.{table} VALUES (123)")
-    main_node.query(f"ALTER TABLE testdb.{table} DETACH PART '{part_name}'")
+    main_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (123)")
+    main_node.query(f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'")
     if engine == "MergeTree":
-        dummy_node.query(f"INSERT INTO testdb.{table} VALUES (456)")
-        dummy_node.query(f"ALTER TABLE testdb.{table} DETACH PART '{part_name}'")
-    main_node.query(f"ALTER TABLE testdb.{table} DROP DETACHED PART '{part_name}'")
-    detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='testdb' AND table='{table}'"
+        dummy_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (456)")
+        dummy_node.query(f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'")
+    main_node.query(f"ALTER TABLE alter_drop_detached_part.{table} DROP DETACHED PART '{part_name}'")
+    detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_drop_detached_part' AND table='{table}'"
     assert main_node.query(detached_parts_query) == ""
     assert dummy_node.query(detached_parts_query) == f"{part_name}\n"
 
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE alter_drop_detached_part SYNC")
+    dummy_node.query("DROP DATABASE alter_drop_detached_part SYNC")
 
 
 @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"])
 def test_alter_drop_partition(started_cluster, engine):
     main_node.query(
-        "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/clickhouse/databases/test_alter_drop_partition', 'shard1', 'replica1');"
+        "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/clickhouse/databases/test_alter_drop_partition', 'shard1', 'replica2');"
+        "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica2');"
     )
     snapshotting_node.query(
-        "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/clickhouse/databases/test_alter_drop_partition', 'shard2', 'replica1');"
+        "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard2', 'replica1');"
     )
 
     table = f"alter_drop_partition.alter_drop_{engine}"
@@ -430,52 +430,52 @@ def test_alter_drop_partition(started_cluster, engine):
 
 def test_alter_fetch(started_cluster):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alter_fetch ENGINE = Replicated('/test/alter_fetch', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE alter_fetch ENGINE = Replicated('/test/alter_fetch', 'shard1', 'replica2');"
     )
 
     main_node.query(
-        "CREATE TABLE testdb.fetch_source (CounterID UInt32) ENGINE = ReplicatedMergeTree ORDER BY (CounterID)"
+        "CREATE TABLE alter_fetch.fetch_source (CounterID UInt32) ENGINE = ReplicatedMergeTree ORDER BY (CounterID)"
     )
     main_node.query(
-        "CREATE TABLE testdb.fetch_target (CounterID UInt32) ENGINE = ReplicatedMergeTree ORDER BY (CounterID)"
+        "CREATE TABLE alter_fetch.fetch_target (CounterID UInt32) ENGINE = ReplicatedMergeTree ORDER BY (CounterID)"
     )
-    main_node.query("INSERT INTO testdb.fetch_source VALUES (123)")
-    table_uuid = get_table_uuid("testdb", "fetch_source")
+    main_node.query("INSERT INTO alter_fetch.fetch_source VALUES (123)")
+    table_uuid = get_table_uuid("alter_fetch", "fetch_source")
     main_node.query(
-        f"ALTER TABLE testdb.fetch_target FETCH PART 'all_0_0_0' FROM '/clickhouse/tables/{table_uuid}/{{shard}}' "
+        f"ALTER TABLE alter_fetch.fetch_target FETCH PART 'all_0_0_0' FROM '/clickhouse/tables/{table_uuid}/{{shard}}' "
     )
-    detached_parts_query = "SELECT name FROM system.detached_parts WHERE database='testdb' AND table='fetch_target'"
+    detached_parts_query = "SELECT name FROM system.detached_parts WHERE database='alter_fetch' AND table='fetch_target'"
     assert main_node.query(detached_parts_query) == "all_0_0_0\n"
     assert dummy_node.query(detached_parts_query) == ""
 
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE alter_fetch SYNC")
+    dummy_node.query("DROP DATABASE alter_fetch SYNC")
 
 
 def test_alters_from_different_replicas(started_cluster):
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard1', 'replica2');"
     )
 
     # test_alters_from_different_replicas
     competing_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');"
+        "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard1', 'replica3');"
     )
 
     main_node.query(
-        "CREATE TABLE testdb.concurrent_test "
+        "CREATE TABLE alters_from_different_replicas.concurrent_test "
         "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
         "ENGINE = MergeTree PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID);"
     )
 
     main_node.query(
-        "CREATE TABLE testdb.dist AS testdb.concurrent_test ENGINE = Distributed(testdb, testdb, concurrent_test, CounterID)"
+        "CREATE TABLE alters_from_different_replicas.dist AS alters_from_different_replicas.concurrent_test ENGINE = Distributed(alters_from_different_replicas, alters_from_different_replicas, concurrent_test, CounterID)"
     )
 
     dummy_node.stop_clickhouse(kill=True)
@@ -484,7 +484,7 @@ def test_alters_from_different_replicas(started_cluster):
     assert (
         "There are 1 unfinished hosts (0 of them are currently active)"
         in competing_node.query_and_get_error(
-            "ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;",
+            "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
             settings=settings,
         )
     )
@@ -493,7 +493,7 @@ def test_alters_from_different_replicas(started_cluster):
         "distributed_ddl_output_mode": "null_status_on_timeout",
     }
     assert "shard1\treplica2\tQUEUED\t" in main_node.query(
-        "ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;",
+        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added2 UInt32;",
         settings=settings,
     )
     settings = {
@@ -501,22 +501,22 @@ def test_alters_from_different_replicas(started_cluster):
         "distributed_ddl_output_mode": "never_throw",
     }
     assert "shard1\treplica2\tQUEUED\t" in competing_node.query(
-        "ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;",
+        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;",
         settings=settings,
     )
     dummy_node.start_clickhouse()
     main_node.query(
-        "ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;"
+        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;"
     )
     competing_node.query(
-        "ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;"
+        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;"
     )
     main_node.query(
-        "ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;"
+        "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;"
     )
 
     expected = (
-        "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
+        "CREATE TABLE alters_from_different_replicas.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
         "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32,\\n"
         "    `Added0` UInt32,\\n    `Added1` UInt32,\\n    `Added2` UInt32,\\n    `AddedNested1.A` Array(UInt32),\\n"
         "    `AddedNested1.B` Array(UInt64),\\n    `AddedNested1.C` Array(String),\\n    `AddedNested2.A` Array(UInt32),\\n"
@@ -524,51 +524,51 @@ def test_alters_from_different_replicas(started_cluster):
         "ENGINE = MergeTree\\nPARTITION BY toYYYYMM(StartDate)\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\nSETTINGS index_granularity = 8192"
     )
 
-    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "alters_from_different_replicas.concurrent_test", expected)
 
     # test_create_replica_after_delay
-    main_node.query("DROP TABLE testdb.concurrent_test SYNC")
+    main_node.query("DROP TABLE alters_from_different_replicas.concurrent_test SYNC")
     main_node.query(
-        "CREATE TABLE testdb.concurrent_test "
+        "CREATE TABLE alters_from_different_replicas.concurrent_test "
         "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) "
         "ENGINE = ReplicatedMergeTree ORDER BY CounterID;"
     )
 
     expected = (
-        "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
+        "CREATE TABLE alters_from_different_replicas.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
         "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n"
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
     )
 
-    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "alters_from_different_replicas.concurrent_test", expected)
 
     main_node.query(
-        "INSERT INTO testdb.dist (CounterID, StartDate, UserID) SELECT number, addDays(toDate('2020-02-02'), number), intHash32(number) FROM numbers(10)"
+        "INSERT INTO alters_from_different_replicas.dist (CounterID, StartDate, UserID) SELECT number, addDays(toDate('2020-02-02'), number), intHash32(number) FROM numbers(10)"
     )
 
     # test_replica_restart
     main_node.restart_clickhouse()
 
     expected = (
-        "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
+        "CREATE TABLE alters_from_different_replicas.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
         "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n"
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
     )
 
     # test_snapshot_and_snapshot_recover
     snapshotting_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica1');"
+        "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard2', 'replica1');"
     )
     snapshot_recovering_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica2');"
+        "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard2', 'replica2');"
     )
-    assert_create_query(all_nodes, "testdb.concurrent_test", expected)
+    assert_create_query(all_nodes, "alters_from_different_replicas.concurrent_test", expected)
 
-    main_node.query("SYSTEM FLUSH DISTRIBUTED testdb.dist")
+    main_node.query("SYSTEM FLUSH DISTRIBUTED alters_from_different_replicas.dist")
     main_node.query(
-        "ALTER TABLE testdb.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1"
+        "ALTER TABLE alters_from_different_replicas.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1"
     )
-    res = main_node.query("ALTER TABLE testdb.concurrent_test DELETE WHERE UserID % 2")
+    res = main_node.query("ALTER TABLE alters_from_different_replicas.concurrent_test DELETE WHERE UserID % 2")
     assert (
         "shard1\treplica1\tOK" in res
         and "shard1\treplica2\tOK" in res
@@ -585,28 +585,28 @@ def test_alters_from_different_replicas(started_cluster):
     )
     assert (
         main_node.query(
-            "SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster='testdb'"
+            "SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster='alters_from_different_replicas'"
         )
         == expected
     )
 
     # test_drop_and_create_replica
-    main_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE alters_from_different_replicas SYNC")
     main_node.query(
-        "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard1', 'replica1');"
     )
 
     expected = (
-        "CREATE TABLE testdb.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
+        "CREATE TABLE alters_from_different_replicas.concurrent_test\\n(\\n    `CounterID` UInt32,\\n    `StartDate` Date,\\n    `UserID` UInt32,\\n"
         "    `VisitID` UInt32,\\n    `NestedColumn.A` Array(UInt8),\\n    `NestedColumn.S` Array(String),\\n    `ToDrop` UInt32\\n)\\n"
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
     )
 
-    assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected)
-    assert_create_query(all_nodes, "testdb.concurrent_test", expected)
+    assert_create_query([main_node, competing_node], "alters_from_different_replicas.concurrent_test", expected)
+    assert_create_query(all_nodes, "alters_from_different_replicas.concurrent_test", expected)
 
     for node in all_nodes:
-        node.query("SYSTEM SYNC REPLICA testdb.concurrent_test")
+        node.query("SYSTEM SYNC REPLICA alters_from_different_replicas.concurrent_test")
 
     expected = (
         "0\t2021-02-02\t4249604106\n"
@@ -618,14 +618,14 @@ def test_alters_from_different_replicas(started_cluster):
 
     assert_eq_with_retry(
         dummy_node,
-        "SELECT CounterID, StartDate, UserID FROM testdb.dist ORDER BY CounterID",
+        "SELECT CounterID, StartDate, UserID FROM alters_from_different_replicas.dist ORDER BY CounterID",
         expected,
     )
-    main_node.query("DROP DATABASE testdb SYNC")
-    dummy_node.query("DROP DATABASE testdb SYNC")
-    competing_node.query("DROP DATABASE testdb SYNC")
-    snapshotting_node.query("DROP DATABASE testdb SYNC")
-    snapshot_recovering_node.query("DROP DATABASE testdb SYNC")
+    main_node.query("DROP DATABASE alters_from_different_replicas SYNC")
+    dummy_node.query("DROP DATABASE alters_from_different_replicas SYNC")
+    competing_node.query("DROP DATABASE alters_from_different_replicas SYNC")
+    snapshotting_node.query("DROP DATABASE alters_from_different_replicas SYNC")
+    snapshot_recovering_node.query("DROP DATABASE alters_from_different_replicas SYNC")
 
 
 def create_some_tables(db):
@@ -1063,10 +1063,10 @@ def test_server_uuid(started_cluster):
 
 def test_sync_replica(started_cluster):
     main_node.query(
-        "CREATE DATABASE test_sync_database ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');"
+        "CREATE DATABASE test_sync_database ENGINE = Replicated('/test/sync_replica', 'shard1', 'replica1');"
     )
     dummy_node.query(
-        "CREATE DATABASE test_sync_database ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');"
+        "CREATE DATABASE test_sync_database ENGINE = Replicated('/test/sync_replica', 'shard1', 'replica2');"
     )
 
     number_of_tables = 1000
@@ -1113,17 +1113,20 @@ def test_sync_replica(started_cluster):
     )
 
     lp1 = main_node.query(
-        "select value from system.zookeeper where path='/clickhouse/databases/test1/replicas/shard1|replica1' and name='log_ptr'"
+        "select value from system.zookeeper where path='/test/sync_replica/replicas/shard1|replica1' and name='log_ptr'"
     )
     lp2 = main_node.query(
-        "select value from system.zookeeper where path='/clickhouse/databases/test1/replicas/shard1|replica2' and name='log_ptr'"
+        "select value from system.zookeeper where path='/test/sync_replica/replicas/shard1|replica2' and name='log_ptr'"
     )
     max_lp = main_node.query(
-        "select value from system.zookeeper where path='/clickhouse/databases/test1/' and name='max_log_ptr'"
+        "select value from system.zookeeper where path='/test/sync_replica/' and name='max_log_ptr'"
     )
     assert lp1 == max_lp
     assert lp2 == max_lp
 
+    main_node.query("DROP DATABASE test_sync_database SYNC")
+    dummy_node.query("DROP DATABASE test_sync_database SYNC")
+
 
 def test_force_synchronous_settings(started_cluster):
     main_node.query(

From 4457ebab48ab60d8cae6838a40e73f6e72c8b08e Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 2 Mar 2023 12:31:56 -0500
Subject: [PATCH 162/333] Update
 docs/en/sql-reference/functions/date-time-functions.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index a6043d2507e..9dd6ef63ec1 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1156,7 +1156,7 @@ SELECT
 
 ## toYYYYMMDDhhmmss
 
-Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).  Accepts a second optional timezone argument.  If provided, the timezone must be a string constant.
+Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).  Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
 ### example
 ```sql

From fdcbec4fee7df1bb5a51249cef96964b5245c822 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 2 Mar 2023 17:35:56 +0000
Subject: [PATCH 163/333] Automatic style fix

---
 .../test_replicated_database/test.py          | 57 ++++++++++++++-----
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index c71c76244ce..2ab2fe499ff 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -118,11 +118,13 @@ def test_create_replicated_table(started_cluster):
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\n"
         "PARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192"
     )
-    assert_create_query([main_node, dummy_node], "create_replicated_table.replicated_table", expected)
-    # assert without replacing uuid
-    assert main_node.query("show create create_replicated_table.replicated_table") == dummy_node.query(
-        "show create create_replicated_table.replicated_table"
+    assert_create_query(
+        [main_node, dummy_node], "create_replicated_table.replicated_table", expected
     )
+    # assert without replacing uuid
+    assert main_node.query(
+        "show create create_replicated_table.replicated_table"
+    ) == dummy_node.query("show create create_replicated_table.replicated_table")
     main_node.query("DROP DATABASE create_replicated_table SYNC")
     dummy_node.query("DROP DATABASE create_replicated_table SYNC")
 
@@ -330,7 +332,10 @@ def test_alter_drop_part(started_cluster, engine):
         # The DROP operation is still replicated at the table engine level
         assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == ""
     else:
-        assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "456\n"
+        assert (
+            dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}")
+            == "456\n"
+        )
     main_node.query("DROP DATABASE alter_drop_part SYNC")
     dummy_node.query("DROP DATABASE alter_drop_part SYNC")
 
@@ -379,11 +384,17 @@ def test_alter_drop_detached_part(started_cluster, engine):
         f"CREATE TABLE alter_drop_detached_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)"
     )
     main_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (123)")
-    main_node.query(f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'")
+    main_node.query(
+        f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'"
+    )
     if engine == "MergeTree":
         dummy_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (456)")
-        dummy_node.query(f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'")
-    main_node.query(f"ALTER TABLE alter_drop_detached_part.{table} DROP DETACHED PART '{part_name}'")
+        dummy_node.query(
+            f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'"
+        )
+    main_node.query(
+        f"ALTER TABLE alter_drop_detached_part.{table} DROP DETACHED PART '{part_name}'"
+    )
     detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_drop_detached_part' AND table='{table}'"
     assert main_node.query(detached_parts_query) == ""
     assert dummy_node.query(detached_parts_query) == f"{part_name}\n"
@@ -524,7 +535,11 @@ def test_alters_from_different_replicas(started_cluster):
         "ENGINE = MergeTree\\nPARTITION BY toYYYYMM(StartDate)\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\nSETTINGS index_granularity = 8192"
     )
 
-    assert_create_query([main_node, competing_node], "alters_from_different_replicas.concurrent_test", expected)
+    assert_create_query(
+        [main_node, competing_node],
+        "alters_from_different_replicas.concurrent_test",
+        expected,
+    )
 
     # test_create_replica_after_delay
     main_node.query("DROP TABLE alters_from_different_replicas.concurrent_test SYNC")
@@ -540,7 +555,11 @@ def test_alters_from_different_replicas(started_cluster):
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
     )
 
-    assert_create_query([main_node, competing_node], "alters_from_different_replicas.concurrent_test", expected)
+    assert_create_query(
+        [main_node, competing_node],
+        "alters_from_different_replicas.concurrent_test",
+        expected,
+    )
 
     main_node.query(
         "INSERT INTO alters_from_different_replicas.dist (CounterID, StartDate, UserID) SELECT number, addDays(toDate('2020-02-02'), number), intHash32(number) FROM numbers(10)"
@@ -562,13 +581,17 @@ def test_alters_from_different_replicas(started_cluster):
     snapshot_recovering_node.query(
         "CREATE DATABASE alters_from_different_replicas ENGINE = Replicated('/test/alters_from_different_replicas', 'shard2', 'replica2');"
     )
-    assert_create_query(all_nodes, "alters_from_different_replicas.concurrent_test", expected)
+    assert_create_query(
+        all_nodes, "alters_from_different_replicas.concurrent_test", expected
+    )
 
     main_node.query("SYSTEM FLUSH DISTRIBUTED alters_from_different_replicas.dist")
     main_node.query(
         "ALTER TABLE alters_from_different_replicas.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1"
     )
-    res = main_node.query("ALTER TABLE alters_from_different_replicas.concurrent_test DELETE WHERE UserID % 2")
+    res = main_node.query(
+        "ALTER TABLE alters_from_different_replicas.concurrent_test DELETE WHERE UserID % 2"
+    )
     assert (
         "shard1\treplica1\tOK" in res
         and "shard1\treplica2\tOK" in res
@@ -602,8 +625,14 @@ def test_alters_from_different_replicas(started_cluster):
         "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192"
     )
 
-    assert_create_query([main_node, competing_node], "alters_from_different_replicas.concurrent_test", expected)
-    assert_create_query(all_nodes, "alters_from_different_replicas.concurrent_test", expected)
+    assert_create_query(
+        [main_node, competing_node],
+        "alters_from_different_replicas.concurrent_test",
+        expected,
+    )
+    assert_create_query(
+        all_nodes, "alters_from_different_replicas.concurrent_test", expected
+    )
 
     for node in all_nodes:
         node.query("SYSTEM SYNC REPLICA alters_from_different_replicas.concurrent_test")

From 167e4903a8f5b175d587efaed15c154ee0837991 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 2 Mar 2023 18:45:59 +0100
Subject: [PATCH 164/333] Fix concrete columns PREWHERE support

This is the fix for the IStorage::supportedPrewhereColumns() API.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/InterpreterSelectQuery.cpp      |  3 +--
 .../MergeTree/MergeTreeWhereOptimizer.cpp        | 16 ++++++++++++++++
 src/Storages/MergeTree/MergeTreeWhereOptimizer.h |  3 +++
 ...rge_prewhere_different_default_kind.reference |  9 +++++----
 ...575_merge_prewhere_different_default_kind.sql | 11 +++++++----
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 318ea5fdf42..a4eed606de1 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -593,8 +593,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
                 Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
                 const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
-                if (supported_prewhere_columns.has_value())
-                    std::erase_if(queried_columns, [&](const auto & name) { return !supported_prewhere_columns->contains(name); });
 
                 MergeTreeWhereOptimizer{
                     current_info,
@@ -602,6 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                     std::move(column_compressed_sizes),
                     metadata_snapshot,
                     queried_columns,
+                    supported_prewhere_columns,
                     log};
             }
         }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index 3a866cc8934..fdddc29048b 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -32,10 +32,12 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
     std::unordered_map<std::string, UInt64> column_sizes_,
     const StorageMetadataPtr & metadata_snapshot,
     const Names & queried_columns_,
+    const std::optional<NameSet> & supported_columns_,
     Poco::Logger * log_)
     : table_columns{collections::map<std::unordered_set>(
         metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })}
     , queried_columns{queried_columns_}
+    , supported_columns{supported_columns_}
     , sorting_key_names{NameSet(
           metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())}
     , block_with_constants{KeyCondition::getBlockWithConstants(query_info.query->clone(), query_info.syntax_analyzer_result, context)}
@@ -195,6 +197,8 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node,
             && (!is_final || isExpressionOverSortingKey(node))
             /// Only table columns are considered. Not array joined columns. NOTE We're assuming that aliases was expanded.
             && isSubsetOfTableColumns(cond.identifiers)
+            /// Some identifiers can unable to support PREWHERE (usually because of different types in Merge engine)
+            && identifiersSupportsPrewhere(cond.identifiers)
             /// Do not move conditions involving all queried columns.
             && cond.identifiers.size() < queried_columns.size();
 
@@ -321,6 +325,18 @@ UInt64 MergeTreeWhereOptimizer::getIdentifiersColumnSize(const NameSet & identif
     return size;
 }
 
+bool MergeTreeWhereOptimizer::identifiersSupportsPrewhere(const NameSet & identifiers) const
+{
+    if (!supported_columns.has_value())
+        return true;
+
+    for (const auto & identifier : identifiers)
+        if (!supported_columns->contains(identifier))
+            return false;
+
+    return true;
+}
+
 bool MergeTreeWhereOptimizer::isExpressionOverSortingKey(const ASTPtr & ast) const
 {
     if (const auto * func = ast->as<ASTFunction>())
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index f37255bdbee..8953923542e 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -39,6 +39,7 @@ public:
         std::unordered_map<std::string, UInt64> column_sizes_,
         const StorageMetadataPtr & metadata_snapshot,
         const Names & queried_columns_,
+        const std::optional<NameSet> & supported_columns_,
         Poco::Logger * log_);
 
 private:
@@ -82,6 +83,7 @@ private:
     void optimizeArbitrary(ASTSelectQuery & select) const;
 
     UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const;
+    bool identifiersSupportsPrewhere(const NameSet & identifiers) const;
 
     bool isExpressionOverSortingKey(const ASTPtr & ast) const;
 
@@ -105,6 +107,7 @@ private:
 
     const StringSet table_columns;
     const Names queried_columns;
+    const std::optional<NameSet> supported_columns;
     const NameSet sorting_key_names;
     const Block block_with_constants;
     Poco::Logger * log;
diff --git a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference
index 32db2512eab..c17e235ddad 100644
--- a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference
+++ b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference
@@ -1,12 +1,13 @@
 -- { echoOn }
 -- for pure PREWHERE it is not addressed yet.
 SELECT * FROM m PREWHERE a = 'OK';
-OK	0
+OK	1970-01-01	0
 SELECT * FROM m PREWHERE f = 0; -- { serverError ILLEGAL_PREWHERE }
 SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0;
-OK	0
+OK	1970-01-01	0
 SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
-OK	0
+OK	1970-01-01	0
 -- { echoOn }
 SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
-OK	0
+OK	1970-01-01	0
+OK	1970-01-01	0
diff --git a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql
index 0f1d582a26e..88c7923a570 100644
--- a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql
+++ b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql
@@ -6,20 +6,22 @@ DROP TABLE IF EXISTS t2;
 
 CREATE TABLE m
 (
-    `a` String,
-    `f` UInt8
+    a String,
+    date Date,
+    f UInt8
 )
 ENGINE = Merge(currentDatabase(), '^(t1|t2)$');
 
 CREATE TABLE t1
 (
     a String,
+    date Date,
     f UInt8 ALIAS 0
 )
 ENGINE = MergeTree
 ORDER BY tuple()
 SETTINGS index_granularity = 8192;
-INSERT INTO t1 VALUES ('OK');
+INSERT INTO t1 (a) VALUES ('OK');
 
 -- { echoOn }
 -- for pure PREWHERE it is not addressed yet.
@@ -32,12 +34,13 @@ SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
 CREATE TABLE t2
 (
     a String,
+    date Date,
     f UInt8,
 )
 ENGINE = MergeTree
 ORDER BY tuple()
 SETTINGS index_granularity = 8192;
-INSERT INTO t2 VALUES ('OK', 1);
+INSERT INTO t2 (a) VALUES ('OK');
 
 -- { echoOn }
 SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;

From a0a40fb0575582ade22514dd9d95f20db887ae8b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 2 Mar 2023 17:51:43 +0000
Subject: [PATCH 165/333] nam --> name

---
 tests/ci/compatibility_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 398f3df3deb..688fe883c1e 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -210,7 +210,7 @@ def main():
     max_glibc_version = ""
     if "x86" in args.check_name:
         max_glibc_version = "2.4"
-    elif "aarch64" in args.check_nam:
+    elif "aarch64" in args.check_name:
         max_glibc_version = "2.18"  # because of build with newer sysroot?
     else:
         raise Exception("Can't determine max glibc version")

From 2db7df61537e1eb25f356d7d7625f2907721d257 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 2 Mar 2023 12:55:04 -0500
Subject: [PATCH 166/333] Update
 docs/en/sql-reference/functions/date-time-functions.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 41e50977bba..1b75ceaa9b4 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1126,7 +1126,7 @@ Rounds the time to the half hour.
 
 ## toYYYYMM
 
-Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument.  If provided, the timezone must be a string constant.
+Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
 ### example
 ```sql

From 9cb70723d1afd51b57e8010720dd3f47e88e23bc Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 2 Mar 2023 12:55:16 -0500
Subject: [PATCH 167/333] Update
 docs/en/sql-reference/functions/date-time-functions.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 1b75ceaa9b4..0892f0d7ef7 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1141,7 +1141,7 @@ SELECT
 
 ## toYYYYMMDD
 
-Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).  Accepts a second optional timezone argument.  If provided, the timezone must be a string constant.
+Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
 ### example
 ```sql

From fbabba78c905cca5b1c24ae4fdd2503f5870a146 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 2 Mar 2023 12:55:23 -0500
Subject: [PATCH 168/333] Update
 docs/en/sql-reference/functions/date-time-functions.md

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
---
 docs/en/sql-reference/functions/date-time-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 0892f0d7ef7..ef0475027dd 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1156,7 +1156,7 @@ SELECT
 
 ## toYYYYMMDDhhmmss
 
-Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).  Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
+Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
 ### example
 ```sql

From a6c2473518bf78d4ccfa2f06f2621a09d3f4d092 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 1 Mar 2023 18:16:03 +0100
Subject: [PATCH 169/333] do flushUntrackedMemory when context swith

---
 src/Common/CurrentThread.cpp         | 3 +--
 src/Common/ThreadStatus.cpp          | 6 ++++++
 src/Common/ThreadStatus.h            | 2 ++
 src/Interpreters/ThreadStatusExt.cpp | 4 +++-
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp
index 526e28c043d..90483171304 100644
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@@ -129,8 +129,7 @@ void CurrentThread::flushUntrackedMemory()
     if (current_thread->untracked_memory == 0)
         return;
 
-    current_thread->memory_tracker.adjustWithUntrackedMemory(current_thread->untracked_memory);
-    current_thread->untracked_memory = 0;
+    current_thread->flushUntrackedMemory();
 }
 
 }
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 46c171b5cb6..da723f5c89d 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -144,6 +144,12 @@ ThreadStatus::ThreadStatus()
 #endif
 }
 
+void ThreadStatus::flushUntrackedMemory()
+{
+    memory_tracker.adjustWithUntrackedMemory(untracked_memory);
+    untracked_memory = 0;
+}
+
 ThreadStatus::~ThreadStatus()
 {
     memory_tracker.adjustWithUntrackedMemory(untracked_memory);
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 20550a63312..e620413c8eb 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -290,6 +290,8 @@ public:
 
     void logToQueryViewsLog(const ViewRuntimeData & vinfo);
 
+    void flushUntrackedMemory();
+
 protected:
     void applyQuerySettings();
 
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 84400fc3711..b22748e84ae 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -352,8 +352,10 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
         thread_group->threads.erase(this);
     }
     performance_counters.setParent(&ProfileEvents::global_counters);
-    memory_tracker.reset();
 
+    flushUntrackedMemory();
+
+    memory_tracker.reset();
     memory_tracker.setParent(thread_group->memory_tracker.getParent());
 
     query_id.clear();

From 7a9f4aab8ed435008e35b2cb73f634da50fb0630 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 2 Mar 2023 19:16:42 +0100
Subject: [PATCH 170/333] do not call adjustWithUntrackedMemory when
 untracked_memory is 0

---
 src/Common/CurrentThread.cpp         | 3 ---
 src/Common/ThreadStatus.cpp          | 5 ++++-
 src/Storages/MergeTree/MergeList.cpp | 7 +++++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp
index 90483171304..c10d5dbc68c 100644
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@@ -126,9 +126,6 @@ void CurrentThread::flushUntrackedMemory()
 {
     if (unlikely(!current_thread))
         return;
-    if (current_thread->untracked_memory == 0)
-        return;
-
     current_thread->flushUntrackedMemory();
 }
 
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index da723f5c89d..407343aa268 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -146,13 +146,16 @@ ThreadStatus::ThreadStatus()
 
 void ThreadStatus::flushUntrackedMemory()
 {
+    if (untracked_memory == 0)
+        return;
+
     memory_tracker.adjustWithUntrackedMemory(untracked_memory);
     untracked_memory = 0;
 }
 
 ThreadStatus::~ThreadStatus()
 {
-    memory_tracker.adjustWithUntrackedMemory(untracked_memory);
+    flushUntrackedMemory();
 
     if (thread_group)
     {
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 76d69cc6b7d..fa1887a02e6 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -144,8 +144,11 @@ MergeInfo MergeListElement::getInfo() const
 
 MergeListElement::~MergeListElement()
 {
-    CurrentThread::getMemoryTracker()->adjustWithUntrackedMemory(untracked_memory);
-    untracked_memory = 0;
+    if (untracked_memory != 0)
+    {
+        CurrentThread::getMemoryTracker()->adjustWithUntrackedMemory(untracked_memory);
+        untracked_memory = 0;
+    }
 }
 
 

From edd238273c4d6d0a6462c6f2bc0e7d7656169bf1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 2 Mar 2023 19:51:05 +0100
Subject: [PATCH 171/333] fix another bug in client

---
 src/Client/ClientBase.cpp                                    | 4 ++--
 src/Server/TCPHandler.cpp                                    | 3 +++
 .../0_stateless/02232_dist_insert_send_logs_level_hung.sh    | 2 +-
 .../0_stateless/02434_cancel_insert_when_client_dies.sh      | 3 ++-
 .../queries/0_stateless/02435_rollback_cancelled_queries.sh  | 5 +++--
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 96aff9aa304..53eb5080130 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1360,7 +1360,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
             throw;
         }
 
-        if (have_data_in_stdin)
+        if (have_data_in_stdin && !cancelled)
             sendDataFromStdin(sample, columns_description_for_query, parsed_query);
     }
     else if (parsed_insert_query->data)
@@ -1370,7 +1370,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
         try
         {
             sendDataFrom(data_in, sample, columns_description_for_query, parsed_query, have_data_in_stdin);
-            if (have_data_in_stdin)
+            if (have_data_in_stdin && !cancelled)
                 sendDataFromStdin(sample, columns_description_for_query, parsed_query);
         }
         catch (Exception & e)
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index a1ef7a98992..768f16a09e0 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1335,6 +1335,7 @@ bool TCPHandler::receivePacket()
                 std::this_thread::sleep_for(ms);
             }
 
+            LOG_INFO(log, "Received 'Cancel' packet from the client, canceling the query");
             state.is_cancelled = true;
 
             return false;
@@ -1378,6 +1379,7 @@ String TCPHandler::receiveReadTaskResponseAssumeLocked()
     {
         if (packet_type == Protocol::Client::Cancel)
         {
+            LOG_INFO(log, "Received 'Cancel' packet from the client, canceling the read task");
             state.is_cancelled = true;
             /// For testing connection collector.
             if (unlikely(sleep_in_receive_cancel.totalMilliseconds()))
@@ -1411,6 +1413,7 @@ std::optional<ParallelReadResponse> TCPHandler::receivePartitionMergeTreeReadTas
     {
         if (packet_type == Protocol::Client::Cancel)
         {
+            LOG_INFO(log, "Received 'Cancel' packet from the client, canceling the MergeTree read task");
             state.is_cancelled = true;
             /// For testing connection collector.
             if (unlikely(sleep_in_receive_cancel.totalMilliseconds()))
diff --git a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh
index 322e7e73991..5ed94148bc1 100755
--- a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh
+++ b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh
@@ -49,7 +49,7 @@ insert_client_opts=(
 timeout 250s $CLICKHOUSE_CLIENT "${client_opts[@]}" "${insert_client_opts[@]}" -q "insert into function remote('127.2', currentDatabase(), in_02232) select * from numbers(1e6)"
 
 # Kill underlying query of remote() to make KILL faster
-timeout 30s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE Settings['log_comment'] = '$CLICKHOUSE_LOG_COMMENT' SYNC" --format Null
+timeout 60s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE Settings['log_comment'] = '$CLICKHOUSE_LOG_COMMENT' SYNC" --format Null
 echo $?
 
 $CLICKHOUSE_CLIENT "${client_opts[@]}" -nm -q "
diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index c4d7367ba14..85aa992d98c 100755
--- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -93,5 +93,6 @@ $CLICKHOUSE_CLIENT -q 'system flush logs'
 
 # Ensure that thread_cancel actually did something
 $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
-  message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
+  message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes',
+  'Query was cancelled or a client has unexpectedly dropped the connection') or
   message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
index 7c7ef037e02..408743d395b 100755
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -47,7 +47,7 @@ function insert_data
     fi
 
     if [[ "$IMPLICIT" -eq 0 ]]; then
-        $CLICKHOUSE_CURL -sS -d 'commit' "$CLICKHOUSE_URL&$TXN_SETTINGS&close_session=1" | grep -Faq "Transaction is not in RUNNING state" && $CLICKHOUSE_CURL -sS -d 'rollback' "$CLICKHOUSE_URL&$TXN_SETTINGS"
+        $CLICKHOUSE_CURL -sS -d 'commit' "$CLICKHOUSE_URL&$TXN_SETTINGS&close_session=1"
     fi
 }
 
@@ -109,7 +109,8 @@ $CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select throwIf(count() % 1000000
 
 # Ensure that thread_cancel actually did something
 $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
-  message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes') or
+  message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes',
+  'Query was cancelled or a client has unexpectedly dropped the connection') or
   message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
 
 wait_for_queries_to_finish 30

From 547319eb371a7853a5703fcb780aec4e894dfc89 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 2 Mar 2023 19:53:47 +0100
Subject: [PATCH 172/333] fix

---
 tests/queries/0_stateless/02435_rollback_cancelled_queries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
index 408743d395b..e4aec6503a4 100755
--- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
+++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh
@@ -62,7 +62,7 @@ function thread_insert
     # supress "Killed" messages from bash
     while true; do
         export ID="$TEST_MARK$RANDOM"
-        bash -c insert_data 2>&1| grep -Fav "Killed"
+        bash -c insert_data 2>&1| grep -Fav "Killed" | grep -Fav "SESSION_IS_LOCKED" | grep -Fav "SESSION_NOT_FOUND"
     done
 }
 

From 31bc032e0b71e5373b14338622e7d7b2dd7b6d18 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 2 Mar 2023 19:23:00 +0000
Subject: [PATCH 173/333] Docs: Cleanup default value docs

---
 .../sql-reference/statements/create/table.md  | 33 +++++++++----------
 src/Interpreters/InterpreterInsertQuery.cpp   |  4 +--
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index 54977e1b0ab..4bb6430eff6 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -112,23 +112,21 @@ See also [data_type_default_nullable](../../../operations/settings/settings.md#d
 
 ## Default Values
 
-The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
+The column description can specify a default value expression in the form of `DEFAULT expr`, `MATERIALIZED expr`, or `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`.
 
-Example: `URLDomain String DEFAULT domain(URL)`.
+The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.
 
-If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `1970-01-01` for dates or zero unix timestamp for DateTime, NULL for Nullable.
+The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
 
-If the default expression is defined, the column type is optional. If there isn’t an explicitly defined type, the default expression type is used. Example: `EventDate DEFAULT toDate(EventTime)` – the ‘Date’ type will be used for the ‘EventDate’ column.
+If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.
 
-If the data type and default expression are defined explicitly, this expression will be cast to the specified type using type casting functions. Example: `Hits UInt32 DEFAULT 0` means the same thing as `Hits UInt32 DEFAULT toUInt32(0)`.
-
-Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
+A default value expression `expr` may reference arbitrary table columns and constants. ClickHouse checks that changes of the table structure do not introduce loops in the expression calculation. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
 
 ### DEFAULT
 
 `DEFAULT expr`
 
-Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
+Normal default value. If the value of such a column is not specified in an INSERT query, it is computed from `expr`.
 
 Example:
 
@@ -154,9 +152,9 @@ SELECT * FROM test;
 
 `MATERIALIZED expr`
 
-Materialized expression. Such a column can’t be specified for INSERT, because it is always calculated.
-For an INSERT without a list of columns, these columns are not considered.
-In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
+Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries.
+
+Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.
 
 Example:
 
@@ -192,8 +190,9 @@ SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
 
 `EPHEMERAL [expr]`
 
-Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
-INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved -  the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
+Ephemeral column. Columns of this type are not stored in the table and it is not possible to SELECT from them. The only purpose of ephemeral columns is to build default value expressions of other columns from them.
+
+An insert without explicitly specified columns will skip columns of this type. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`.
 
 Example:
 
@@ -205,7 +204,7 @@ CREATE OR REPLACE TABLE test
     hexed FixedString(4) DEFAULT unhex(unhexed)
 )
 ENGINE = MergeTree
-ORDER BY id
+ORDER BY id;
 
 INSERT INTO test (id, unhexed) Values (1, '5a90b714');
 
@@ -227,9 +226,9 @@ hex(hexed): 5A90B714
 
 `ALIAS expr`
 
-Synonym. Such a column isn’t stored in the table at all.
-Its values can’t be inserted in a table, and it is not substituted when using an asterisk in a SELECT query.
-It can be used in SELECTs if the alias is expanded during query parsing.
+Calculated columns (synonym). Column of this type are not stored in the table and it is not possible to INSERT values into them.
+
+When SELECT queries explicitly reference columns of this type, the value is computed at query time from `expr`. By default, `SELECT *` excludes ALIAS columns. This behavior can be disabled with setting `asteriks_include_alias_columns`.
 
 When using the ALTER query to add new columns, old data for these columns is not written. Instead, when reading old data that does not have values for the new columns, expressions are computed on the fly by default. However, if running the expressions requires different columns that are not indicated in the query, these columns will additionally be read, but only for the blocks of data that need it.
 
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index b4a19ea7403..3cae219fa60 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -161,7 +161,7 @@ Block InterpreterInsertQuery::getSampleBlock(
             if (table_sample_physical.has(current_name))
             {
                 if (!allow_materialized)
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.",
+                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is a MATERIALIZED column",
                         current_name);
                 res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name));
             }
@@ -527,7 +527,7 @@ BlockIO InterpreterInsertQuery::execute()
         {
             for (const auto & column : metadata_snapshot->getColumns())
                 if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
+                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is a MATERIALIZED column", column.name);
         }
 
         res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));

From c848052c7cb00e776aaec7087f735c28ac8b878e Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 2 Mar 2023 19:25:50 +0000
Subject: [PATCH 174/333] Add test, rename some tests to fix order

---
 ...reference => 02581_width_bucket.reference} |  0
 ...idth_bucket.sql => 02581_width_bucket.sql} |  0
 ...join_subquery_empty_column_list.reference} |  0
 ...lyzer_join_subquery_empty_column_list.sql} |  0
 ..._async_reading_with_small_limit.reference} |  0
 ... 02582_async_reading_with_small_limit.sql} |  0
 ...rence => 02583_map_literal_cast.reference} |  0
 ...al_cast.sql => 02583_map_literal_cast.sql} |  0
 ...4.reference => 02584_range_ipv4.reference} |  0
 ...74_range_ipv4.sql => 02584_range_ipv4.sql} |  0
 .../02585_query_status_deadlock.reference     |  0
 .../02585_query_status_deadlock.sh            | 24 +++++++++++++++++++
 12 files changed, 24 insertions(+)
 rename tests/queries/0_stateless/{25337_width_bucket.reference => 02581_width_bucket.reference} (100%)
 rename tests/queries/0_stateless/{25337_width_bucket.sql => 02581_width_bucket.sql} (100%)
 rename tests/queries/0_stateless/{25339_analyzer_join_subquery_empty_column_list.reference => 02582_analyzer_join_subquery_empty_column_list.reference} (100%)
 rename tests/queries/0_stateless/{25339_analyzer_join_subquery_empty_column_list.sql => 02582_analyzer_join_subquery_empty_column_list.sql} (100%)
 rename tests/queries/0_stateless/{02664_async_reading_with_small_limit.reference => 02582_async_reading_with_small_limit.reference} (100%)
 rename tests/queries/0_stateless/{02664_async_reading_with_small_limit.sql => 02582_async_reading_with_small_limit.sql} (100%)
 rename tests/queries/0_stateless/{02670_map_literal_cast.reference => 02583_map_literal_cast.reference} (100%)
 rename tests/queries/0_stateless/{02670_map_literal_cast.sql => 02583_map_literal_cast.sql} (100%)
 rename tests/queries/0_stateless/{02674_range_ipv4.reference => 02584_range_ipv4.reference} (100%)
 rename tests/queries/0_stateless/{02674_range_ipv4.sql => 02584_range_ipv4.sql} (100%)
 create mode 100644 tests/queries/0_stateless/02585_query_status_deadlock.reference
 create mode 100755 tests/queries/0_stateless/02585_query_status_deadlock.sh

diff --git a/tests/queries/0_stateless/25337_width_bucket.reference b/tests/queries/0_stateless/02581_width_bucket.reference
similarity index 100%
rename from tests/queries/0_stateless/25337_width_bucket.reference
rename to tests/queries/0_stateless/02581_width_bucket.reference
diff --git a/tests/queries/0_stateless/25337_width_bucket.sql b/tests/queries/0_stateless/02581_width_bucket.sql
similarity index 100%
rename from tests/queries/0_stateless/25337_width_bucket.sql
rename to tests/queries/0_stateless/02581_width_bucket.sql
diff --git a/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference b/tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.reference
similarity index 100%
rename from tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference
rename to tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.reference
diff --git a/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql b/tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.sql
similarity index 100%
rename from tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql
rename to tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.sql
diff --git a/tests/queries/0_stateless/02664_async_reading_with_small_limit.reference b/tests/queries/0_stateless/02582_async_reading_with_small_limit.reference
similarity index 100%
rename from tests/queries/0_stateless/02664_async_reading_with_small_limit.reference
rename to tests/queries/0_stateless/02582_async_reading_with_small_limit.reference
diff --git a/tests/queries/0_stateless/02664_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql
similarity index 100%
rename from tests/queries/0_stateless/02664_async_reading_with_small_limit.sql
rename to tests/queries/0_stateless/02582_async_reading_with_small_limit.sql
diff --git a/tests/queries/0_stateless/02670_map_literal_cast.reference b/tests/queries/0_stateless/02583_map_literal_cast.reference
similarity index 100%
rename from tests/queries/0_stateless/02670_map_literal_cast.reference
rename to tests/queries/0_stateless/02583_map_literal_cast.reference
diff --git a/tests/queries/0_stateless/02670_map_literal_cast.sql b/tests/queries/0_stateless/02583_map_literal_cast.sql
similarity index 100%
rename from tests/queries/0_stateless/02670_map_literal_cast.sql
rename to tests/queries/0_stateless/02583_map_literal_cast.sql
diff --git a/tests/queries/0_stateless/02674_range_ipv4.reference b/tests/queries/0_stateless/02584_range_ipv4.reference
similarity index 100%
rename from tests/queries/0_stateless/02674_range_ipv4.reference
rename to tests/queries/0_stateless/02584_range_ipv4.reference
diff --git a/tests/queries/0_stateless/02674_range_ipv4.sql b/tests/queries/0_stateless/02584_range_ipv4.sql
similarity index 100%
rename from tests/queries/0_stateless/02674_range_ipv4.sql
rename to tests/queries/0_stateless/02584_range_ipv4.sql
diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.reference b/tests/queries/0_stateless/02585_query_status_deadlock.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh
new file mode 100755
index 00000000000..92dd05ef46c
--- /dev/null
+++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1"
+
+$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q "
+create temporary table tmp as select * from numbers(500000000);
+select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null &
+
+
+while true
+do
+    res=$($CLICKHOUSE_CLIENT -q "select query, event_time from system.query_log where query_id = '$QUERY_ID' and query like 'select%' limit 1")
+    if [ -n "$res" ]; then
+        break
+    fi
+    sleep  1
+done
+
+$CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null
+

From 93861e04ae941d2b20b9b6f79b73e19bacec74ab Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 2 Mar 2023 20:48:25 +0000
Subject: [PATCH 175/333] Fix fix fix

---
 tests/ci/compatibility_check.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 688fe883c1e..a06f4baf359 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -60,18 +60,20 @@ def process_glibc_check(log_path: str, max_glibc_version: str) -> TestResults:
 
 
 def process_result(
-    result_folder: str, server_log_folder: str, max_glibc_version: str
+    result_folder: str, server_log_folder: str, check_glibc: bool, check_distributions: bool, max_glibc_version: str
 ) -> Tuple[str, str, TestResults, List[str]]:
     glibc_log_path = os.path.join(result_folder, "glibc.log")
     test_results = process_glibc_check(glibc_log_path, max_glibc_version)
 
     status = "success"
     description = "Compatibility check passed"
-    if len(test_results) > 1 or test_results[0].status != "OK":
-        status = "failure"
-        description = "glibc check failed"
 
-    if status == "success":
+    if check_glibc:
+        if len(test_results) > 1 or test_results[0].status != "OK":
+            status = "failure"
+            description = "glibc check failed"
+
+    if status == "success" and check_distributions:
         for operating_system in ("ubuntu:12.04", "centos:5"):
             test_result = process_os_check(
                 os.path.join(result_folder, operating_system)
@@ -217,7 +219,7 @@ def main():
 
     s3_helper = S3Helper()
     state, description, test_results, additional_logs = process_result(
-        result_path, server_log_path, max_glibc_version
+        result_path, server_log_path, args.check_glibc, args.check_distributions, max_glibc_version
     )
 
     ch_helper = ClickHouseHelper()

From 6eb2877e532aeb083357dcd4a0d7477e4de357b8 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 2 Mar 2023 20:54:45 +0000
Subject: [PATCH 176/333] Automatic style fix

---
 tests/ci/compatibility_check.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index a06f4baf359..8841bedbb18 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -60,7 +60,11 @@ def process_glibc_check(log_path: str, max_glibc_version: str) -> TestResults:
 
 
 def process_result(
-    result_folder: str, server_log_folder: str, check_glibc: bool, check_distributions: bool, max_glibc_version: str
+    result_folder: str,
+    server_log_folder: str,
+    check_glibc: bool,
+    check_distributions: bool,
+    max_glibc_version: str,
 ) -> Tuple[str, str, TestResults, List[str]]:
     glibc_log_path = os.path.join(result_folder, "glibc.log")
     test_results = process_glibc_check(glibc_log_path, max_glibc_version)
@@ -219,7 +223,11 @@ def main():
 
     s3_helper = S3Helper()
     state, description, test_results, additional_logs = process_result(
-        result_path, server_log_path, args.check_glibc, args.check_distributions, max_glibc_version
+        result_path,
+        server_log_path,
+        args.check_glibc,
+        args.check_distributions,
+        max_glibc_version,
     )
 
     ch_helper = ClickHouseHelper()

From fc227d94bcaf9cd4d3583b1cb5c91c177b150cb8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 2 Mar 2023 21:20:26 +0000
Subject: [PATCH 177/333] 02346_full_text_search.sql: Add result separators to
 simplify analysis

---
 .../02346_full_text_search.reference          |  9 +++++++++
 .../0_stateless/02346_full_text_search.sql    | 19 ++++++++++---------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/02346_full_text_search.reference b/tests/queries/0_stateless/02346_full_text_search.reference
index f1e21e511d0..9cd09110608 100644
--- a/tests/queries/0_stateless/02346_full_text_search.reference
+++ b/tests/queries/0_stateless/02346_full_text_search.reference
@@ -1,3 +1,4 @@
+Test inverted(2)
 af	inverted
 1
 101	Alick a01
@@ -10,6 +11,7 @@ af	inverted
 113	Click b03
 118	Click b08
 1
+Test inverted()
 af	inverted
 101	Alick a01
 106	Alick a06
@@ -21,9 +23,11 @@ af	inverted
 101	Alick a01
 111	Alick b01
 1
+Test on array columns
 af	inverted
 3	['Click a03','Click b03']
 1
+Test on map columns
 af	inverted
 103	{'Click':'Click a03'}
 108	{'Click':'Click a08'}
@@ -32,20 +36,25 @@ af	inverted
 1
 103	{'Click':'Click a03'}
 1
+Test inverted(2) on a column with two parts
 af	inverted
 101	Alick a01
 111	Alick b01
 201	rick c01
 1
+Test inverted(2) on UTF-8 data
 af	inverted
 102	clickhouse你好
 1
+Test max_digestion_size_per_segment
 af	inverted
 BC614E,05397FB1,6969696969898240,CF3304
 1
+Test density==1
 af	inverted
 1
 1
+Test density==0.1
 af	inverted
 1
 1
diff --git a/tests/queries/0_stateless/02346_full_text_search.sql b/tests/queries/0_stateless/02346_full_text_search.sql
index 2b10800e78f..ed086861f1f 100644
--- a/tests/queries/0_stateless/02346_full_text_search.sql
+++ b/tests/queries/0_stateless/02346_full_text_search.sql
@@ -2,7 +2,7 @@ SET allow_experimental_inverted_index = 1;
 SET log_queries = 1;
 
 ----------------------------------------------------
--- Test inverted(2)
+SELECT 'Test inverted(2)';
 
 DROP TABLE IF EXISTS tab;
 
@@ -58,7 +58,7 @@ SELECT read_rows==8 from system.query_log
         LIMIT 1;
 
 ----------------------------------------------------
--- Test inverted()
+SELECT 'Test inverted()';
 
 DROP TABLE IF EXISTS tab_x;
 
@@ -111,7 +111,7 @@ SELECT read_rows==4 from system.query_log
     LIMIT 1;
 
 ----------------------------------------------------
--- Test on array columns
+SELECT 'Test on array columns';
 
 DROP TABLE IF EXISTS tab;
 
@@ -138,7 +138,7 @@ SELECT read_rows==2 from system.query_log
     LIMIT 1;
 
 ----------------------------------------------------
--- Test on map columns
+SELECT 'Test on map columns';
 
 DROP TABLE IF EXISTS tab;
 
@@ -178,7 +178,8 @@ SELECT read_rows==8 from system.query_log
     LIMIT 1;
 
 ----------------------------------------------------
--- Test inverted(2) on a column with two parts
+SELECT 'Test inverted(2) on a column with two parts';
+
 
 DROP TABLE IF EXISTS tab;
 
@@ -206,7 +207,7 @@ SELECT read_rows==6 from system.query_log
     LIMIT 1;
 
 ----------------------------------------------------
--- Test inverted(2) on UTF-8 data
+SELECT 'Test inverted(2) on UTF-8 data';
 
 DROP TABLE IF EXISTS tab;
 
@@ -234,7 +235,7 @@ SELECT read_rows==2 from system.query_log
     LIMIT 1;
 
 ----------------------------------------------------
--- Test max_digestion_size_per_segment
+SELECT 'Test max_digestion_size_per_segment';
 
 DROP TABLE IF EXISTS tab;
 
@@ -265,7 +266,7 @@ SELECT read_rows==256 from system.query_log
         LIMIT 1;
 
 ----------------------------------------------------
--- Test density==1
+SELECT 'Test density==1';
 
 DROP TABLE IF EXISTS tab;
 
@@ -294,7 +295,7 @@ SELECT read_rows==0 from system.query_log
         LIMIT 1;
 
 ----------------------------------------------------
--- Test density==0.1
+SELECT 'Test density==0.1';
 
 DROP TABLE IF EXISTS tab;
 

From 619b28206034120c7eb51b75bc6c919e8feed795 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 3 Mar 2023 01:22:04 +0100
Subject: [PATCH 178/333] more correct handling of fatal errors

---
 src/Common/CurrentThread.cpp         |  4 ++--
 src/Common/CurrentThread.h           |  4 ++--
 src/Common/ThreadStatus.cpp          | 12 +++++++-----
 src/Daemon/BaseDaemon.cpp            |  6 +++---
 src/Interpreters/ThreadStatusExt.cpp |  7 +++++--
 src/Server/GRPCServer.cpp            |  3 +--
 src/Server/TCPHandler.cpp            | 13 +++++--------
 7 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp
index 526e28c043d..cf4bd97490e 100644
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@@ -67,8 +67,8 @@ void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr<InternalTe
 
 void CurrentThread::setFatalErrorCallback(std::function<void()> callback)
 {
-    if (unlikely(!current_thread))
-        return;
+    /// It does not make sense to set a callback for sending logs to a client if there's no thread status
+    chassert(current_thread);
     current_thread->setFatalErrorCallback(callback);
 }
 
diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index f36b92e319d..ffc00c77504 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -100,8 +100,8 @@ public:
     /// Initializes query with current thread as master thread in constructor, and detaches it in destructor
     struct QueryScope : private boost::noncopyable
     {
-        explicit QueryScope(ContextMutablePtr query_context);
-        explicit QueryScope(ContextPtr query_context);
+        explicit QueryScope(ContextMutablePtr query_context, std::function<void()> fatal_error_callback = {});
+        explicit QueryScope(ContextPtr query_context, std::function<void()> fatal_error_callback = {});
         ~QueryScope();
 
         void logPeakMemoryUsage();
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 46c171b5cb6..18b33cd412a 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -226,17 +226,19 @@ void ThreadStatus::attachInternalProfileEventsQueue(const InternalProfileEventsQ
 
 void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
 {
-    fatal_error_callback = std::move(callback);
-
-    if (!thread_group)
-        return;
-
+    /// It does not make sense to set a callback for sending logs to a client if there's no thread group
+    chassert(thread_group);
     std::lock_guard lock(thread_group->mutex);
+    fatal_error_callback = std::move(callback);
     thread_group->fatal_error_callback = fatal_error_callback;
 }
 
 void ThreadStatus::onFatalError()
 {
+    /// No thread group - no callback
+    if (!thread_group)
+        return;
+
     std::lock_guard lock(thread_group->mutex);
     if (fatal_error_callback)
         fatal_error_callback();
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index 60179fd5317..fbfa9e68774 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -134,7 +134,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
 }
 
 
-static std::atomic<bool> fatal_error_printed{false};
+static std::atomic_flag fatal_error_printed;
 
 /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
   */
@@ -165,7 +165,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
         for (size_t i = 0; i < 300; ++i)
         {
             /// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
-            if (fatal_error_printed)
+            if (fatal_error_printed.test())
                 break;
 
             /// This coarse method of synchronization is perfectly ok for fatal signals.
@@ -421,7 +421,7 @@ private:
         if (thread_ptr)
             thread_ptr->onFatalError();
 
-        fatal_error_printed = true;
+        fatal_error_printed.test_and_set();
     }
 };
 
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 84400fc3711..b42f2671a5e 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -550,15 +550,16 @@ void CurrentThread::detachQueryIfNotDetached()
 }
 
 
-CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context)
+CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context, std::function<void()> fatal_error_callback)
 {
     CurrentThread::initializeQuery();
     CurrentThread::attachQueryContext(query_context);
     if (!query_context->hasQueryContext())
         query_context->makeQueryContext();
+    setFatalErrorCallback(fatal_error_callback);
 }
 
-CurrentThread::QueryScope::QueryScope(ContextPtr query_context)
+CurrentThread::QueryScope::QueryScope(ContextPtr query_context, std::function<void()> fatal_error_callback)
 {
     if (!query_context->hasQueryContext())
         throw Exception(
@@ -566,6 +567,7 @@ CurrentThread::QueryScope::QueryScope(ContextPtr query_context)
 
     CurrentThread::initializeQuery();
     CurrentThread::attachQueryContext(query_context);
+    setFatalErrorCallback(fatal_error_callback);
 }
 
 void CurrentThread::QueryScope::logPeakMemoryUsage()
@@ -585,6 +587,7 @@ CurrentThread::QueryScope::~QueryScope()
         if (log_peak_memory_usage_in_destructor)
             logPeakMemoryUsage();
 
+        setFatalErrorCallback({});
         CurrentThread::detachQueryIfNotDetached();
     }
     catch (...)
diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp
index 595f5a8c2b7..533c3d0959b 100644
--- a/src/Server/GRPCServer.cpp
+++ b/src/Server/GRPCServer.cpp
@@ -836,7 +836,7 @@ namespace
         query_context->applySettingsChanges(settings_changes);
 
         query_context->setCurrentQueryId(query_info.query_id());
-        query_scope.emplace(query_context);
+        query_scope.emplace(query_context, /* fatal_error_callback */ [this]{ onFatalError(); });
 
         /// Set up tracing context for this query on current thread
         thread_trace_context = std::make_unique<OpenTelemetry::TracingContextHolder>("GRPCServer",
@@ -854,7 +854,6 @@ namespace
             logs_queue->max_priority = Poco::Logger::parseLevel(client_logs_level.toString());
             logs_queue->setSourceRegexp(settings.send_logs_source_regexp);
             CurrentThread::attachInternalTextLogsQueue(logs_queue, client_logs_level);
-            CurrentThread::setFatalErrorCallback([this]{ onFatalError(); });
         }
 
         /// Set the current database if specified.
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index a307b472a64..cac7160b070 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -277,7 +277,11 @@ void TCPHandler::runImpl()
                 query_context->getSettingsRef(),
                 query_context->getOpenTelemetrySpanLog());
 
-            query_scope.emplace(query_context);
+            query_scope.emplace(query_context, /* fatal_error_callback */ [this]
+            {
+                std::lock_guard lock(fatal_error_mutex);
+                sendLogs();
+            });
 
             /// If query received, then settings in query_context has been updated.
             /// So it's better to update the connection settings for flexibility.
@@ -298,11 +302,6 @@ void TCPHandler::runImpl()
                 state.logs_queue->max_priority = Poco::Logger::parseLevel(client_logs_level.toString());
                 state.logs_queue->setSourceRegexp(query_context->getSettingsRef().send_logs_source_regexp);
                 CurrentThread::attachInternalTextLogsQueue(state.logs_queue, client_logs_level);
-                CurrentThread::setFatalErrorCallback([this]
-                {
-                    std::lock_guard lock(fatal_error_mutex);
-                    sendLogs();
-                });
             }
             if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS)
             {
@@ -611,8 +610,6 @@ void TCPHandler::runImpl()
         /// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
         query_context.reset();
 
-        CurrentThread::setFatalErrorCallback({});
-
         if (is_interserver_mode)
         {
             /// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now.

From 29e396cc30e219815e574105bb170505443a09bb Mon Sep 17 00:00:00 2001
From: houbaron <houbaron@gmail.com>
Date: Fri, 3 Mar 2023 11:57:03 +0800
Subject: [PATCH 179/333] Update skipping-indexes.md

---
 docs/zh/guides/improving-query-performance/skipping-indexes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/guides/improving-query-performance/skipping-indexes.md b/docs/zh/guides/improving-query-performance/skipping-indexes.md
index 2f9ce09d77f..f5889898c2c 100644
--- a/docs/zh/guides/improving-query-performance/skipping-indexes.md
+++ b/docs/zh/guides/improving-query-performance/skipping-indexes.md
@@ -1,6 +1,6 @@
 ---
 slug: /zh/guides/improving-query-performance/skipping-indexes
-sidebar_label: Data Skipping Indexes
+sidebar_label: 跳数索引
 sidebar_position: 2
 ---
 

From 1f7a0c70f0442feb1ea3ac1593a4d699893de6bc Mon Sep 17 00:00:00 2001
From: houbaron <houbaron@gmail.com>
Date: Fri, 3 Mar 2023 12:00:42 +0800
Subject: [PATCH 180/333] Update sparse-primary-indexes.md

---
 .../improving-query-performance/sparse-primary-indexes.md       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
index e773a02fbc3..18b23a79f86 100644
--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@@ -1,6 +1,6 @@
 ---
 slug: /zh/guides/improving-query-performance/sparse-primary-indexes
-sidebar_label: Sparse Primary Indexes
+sidebar_label: 主键稀疏索引
 sidebar_position: 20
 ---
 

From 38d157a850769ff88a040b1f802258f8a9d532c8 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 2 Mar 2023 17:50:26 +0000
Subject: [PATCH 181/333] Fix race in grace hash join with limit

---
 src/Interpreters/GraceHashJoin.cpp                |  4 +++-
 src/Interpreters/GraceHashJoin.h                  |  2 +-
 .../25340_grace_hash_limit_race.reference         |  0
 .../0_stateless/25340_grace_hash_limit_race.sql   | 15 +++++++++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/25340_grace_hash_limit_race.reference
 create mode 100644 tests/queries/0_stateless/25340_grace_hash_limit_race.sql

diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp
index 79a825a752f..1b62939ac7b 100644
--- a/src/Interpreters/GraceHashJoin.cpp
+++ b/src/Interpreters/GraceHashJoin.cpp
@@ -410,6 +410,8 @@ void GraceHashJoin::initialize(const Block & sample_block)
 
 void GraceHashJoin::joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed)
 {
+    std::shared_lock current_bucket_lock(current_bucket_mutex);
+
     if (block.rows() == 0)
     {
         hash_join->joinBlock(block, not_processed);
@@ -549,7 +551,7 @@ public:
 
 IBlocksStreamPtr GraceHashJoin::getDelayedBlocks()
 {
-    std::lock_guard current_bucket_lock(current_bucket_mutex);
+    std::unique_lock current_bucket_lock(current_bucket_mutex);
 
     if (current_bucket == nullptr)
         return nullptr;
diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index 4f7694e2f07..3a0b45370ea 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -139,7 +139,7 @@ private:
     mutable SharedMutex rehash_mutex;
 
     FileBucket * current_bucket = nullptr;
-    mutable std::mutex current_bucket_mutex;
+    mutable SharedMutex current_bucket_mutex;
 
     InMemoryJoinPtr hash_join;
     Block hash_join_sample_block;
diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.reference b/tests/queries/0_stateless/25340_grace_hash_limit_race.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql b/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
new file mode 100644
index 00000000000..322c808c94d
--- /dev/null
+++ b/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS test_grace_hash;
+
+CREATE TABLE test_grace_hash (id UInt32, value UInt64) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO test_grace_hash SELECT number, number % 100 = 0 FROM numbers(100000);
+
+SET join_algorithm = 'grace_hash';
+
+SELECT * FROM (
+    SELECT f.id FROM test_grace_hash AS f
+    LEFT JOIN test_grace_hash AS d
+    ON f.id = d.id
+    LIMIT 1000
+) FORMAT Null;
+

From f06910da98cfc4aafe3bc86876975a86ee3cc760 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 3 Mar 2023 00:47:57 +0300
Subject: [PATCH 182/333] Update 25340_grace_hash_limit_race.sql

---
 tests/queries/0_stateless/25340_grace_hash_limit_race.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql b/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
index 322c808c94d..b12dd7bdbba 100644
--- a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
+++ b/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
@@ -13,3 +13,4 @@ SELECT * FROM (
     LIMIT 1000
 ) FORMAT Null;
 
+DROP TABLE test_grace_hash;

From db2ffc109ccae0118679896cf7c0e9a1f90aad35 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 3 Mar 2023 10:17:34 +0000
Subject: [PATCH 183/333] Add comment to GrachHashJoin::current_bucket_mutex

---
 src/Interpreters/GraceHashJoin.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index 3a0b45370ea..0d6c4741b95 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -139,6 +139,12 @@ private:
     mutable SharedMutex rehash_mutex;
 
     FileBucket * current_bucket = nullptr;
+
+    /* Function `getDelayedBlocks` should be a critical section.
+     * Also some `joinBlock` calls may be in progress and we need to wait for them to finish,
+     * because they may may use `hash_join`, but `getDelayedBlocks` switches it to another bucket.
+     * So, `joinBlock` acquires shared lock and getDelayedBlocks acquires exclusive lock.
+     */
     mutable SharedMutex current_bucket_mutex;
 
     InMemoryJoinPtr hash_join;

From 380e076e842fee30e4bd6b9729a36edbe623e61f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 3 Mar 2023 11:19:05 +0000
Subject: [PATCH 184/333] x86 --> amd64

---
 .github/workflows/backport_branches.yml | 2 +-
 .github/workflows/master.yml            | 2 +-
 .github/workflows/pull_request.yml      | 2 +-
 .github/workflows/release_branches.yml  | 2 +-
 tests/ci/ci_config.py                   | 2 +-
 tests/ci/compatibility_check.py         | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index b14a32127c4..a324d20abc9 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -103,7 +103,7 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
       - name: Cleanup
         if: always()
         run: |
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index e224d6cf5c3..b70fe256833 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -134,7 +134,7 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
       - name: Cleanup
         if: always()
         run: |
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ae6cb1d3a83..ff98739db00 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -198,7 +198,7 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
       - name: Cleanup
         if: always()
         run: |
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 229532efdab..74ec1163cc9 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -95,7 +95,7 @@ jobs:
           sudo rm -fr "$TEMP_PATH"
           mkdir -p "$TEMP_PATH"
           cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (x86)" --check-glibc --check-distributions
+          cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
       - name: Cleanup
         if: always()
         run: |
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 9df198430d2..2f35b337cb3 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -316,7 +316,7 @@ CI_CONFIG = {
         "Integration tests flaky check (asan)": {
             "required_build": "package_asan",
         },
-        "Compatibility check (x86)": {
+        "Compatibility check (amd64)": {
             "required_build": "package_release",
         },
         "Compatibility check (aarch64)": {
diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 8841bedbb18..432e9ec7c01 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -214,7 +214,7 @@ def main():
 
     # See https://sourceware.org/glibc/wiki/Glibc%20Timeline
     max_glibc_version = ""
-    if "x86" in args.check_name:
+    if "amd64" in args.check_name:
         max_glibc_version = "2.4"
     elif "aarch64" in args.check_name:
         max_glibc_version = "2.18"  # because of build with newer sysroot?

From d34f00f4370fd548a739bda02c4d1902c464e945 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 3 Mar 2023 15:02:05 +0300
Subject: [PATCH 185/333] Update run.sh

---
 docker/test/upgrade/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index df32e2833e7..ce8a56c777e 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -162,7 +162,7 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
            -e "Cannot flush" \
            -e "Container already exists" \
     /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
-    && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
+    && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
         >> /test_output/test_results.tsv \
     || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
 

From 56f02374f6dbbb2195279fdf9415a3275a5697c7 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 3 Mar 2023 12:14:05 +0100
Subject: [PATCH 186/333] Remove stateless test because it makes unfixably
 02344_show_caches flaky

---
 tests/queries/0_stateless/02344_show_caches.reference  |  1 -
 .../02454_create_table_with_custom_disk.reference      |  1 -
 .../02454_create_table_with_custom_disk.sql            | 10 ----------
 3 files changed, 12 deletions(-)

diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference
index b321319a309..2ee4f902ba1 100644
--- a/tests/queries/0_stateless/02344_show_caches.reference
+++ b/tests/queries/0_stateless/02344_show_caches.reference
@@ -10,6 +10,5 @@ local_cache
 s3_cache_6
 s3_cache_small
 local_cache_2
-__tmp_internal_324081342946782869538999598488311137423
 local_cache_3
 s3_cache_multi_2
diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference b/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference
index a71d52b6f57..1d8610c59c9 100644
--- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference
+++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.reference
@@ -9,4 +9,3 @@ SETTINGS disk = disk(type = local, path = \'/var/lib/clickhouse/disks/local/\')
 CREATE TABLE default.test\n(\n    `a` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS disk = disk(type = local, path = \'[HIDDEN]\'), index_granularity = 8192
 a	Int32					
 200
-CREATE TABLE default.test\n(\n    `a` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS disk = disk(type = cache, max_size = \'[HIDDEN]\', path = \'[HIDDEN]\', disk = disk(type = local, path = \'[HIDDEN]\')), index_granularity = 8192
diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql
index 4909f91ccb1..6cb1c0774aa 100644
--- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql
+++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql
@@ -26,13 +26,3 @@ DESCRIBE TABLE test;
 
 INSERT INTO test SELECT number FROM numbers(100);
 SELECT count() FROM test;
-
-DROP TABLE test;
-
-CREATE TABLE test (a Int32)
-ENGINE = MergeTree() order by tuple()
-SETTINGS disk = disk(type=cache, max_size='1Gi', path='/var/lib/clickhouse/custom_disk_cache/', disk=disk(type=local, path='/var/lib/clickhouse/disks/local/'));
-
-SHOW CREATE TABLE test;
-
-DROP TABLE test;

From bc8b34f74ba8176020ba65c6d8aed44f8f53afdd Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 3 Mar 2023 13:16:46 +0100
Subject: [PATCH 187/333] adjust test test_seekable_formats

---
 tests/integration/test_storage_s3/test.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 54944b56919..8b20727a7b5 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1056,13 +1056,13 @@ def test_seekable_formats(started_cluster):
     table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')"
     exec_query_with_retry(
         instance,
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1",
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1",
     )
 
     result = instance.query(
-        f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='50M'"
+        f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='60M'"
     )
-    assert int(result) == 1000000
+    assert int(result) == 1500000
 
     instance.query(f"SELECT * FROM {table_function} FORMAT Null")
 
@@ -1073,7 +1073,7 @@ def test_seekable_formats(started_cluster):
     result = result.strip()
     assert result.endswith("MiB")
     result = result[: result.index(".")]
-    assert int(result) > 80
+    assert int(result) > 150
 
 
 def test_seekable_formats_url(started_cluster):
@@ -1083,23 +1083,23 @@ def test_seekable_formats_url(started_cluster):
     table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')"
     exec_query_with_retry(
         instance,
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1",
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1",
     )
 
     result = instance.query(f"SELECT count() FROM {table_function}")
-    assert int(result) == 1000000
+    assert int(result) == 1500000
 
     table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')"
     exec_query_with_retry(
         instance,
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1",
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1",
     )
 
     table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')"
     result = instance.query(
-        f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='50M'"
+        f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='60M'"
     )
-    assert int(result) == 1000000
+    assert int(result) == 1500000
 
 
 def test_empty_file(started_cluster):

From e2278ca70c8945b17dce8ff9698905ca33f6f847 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 1 Mar 2023 20:59:56 +0100
Subject: [PATCH 188/333] Add code for autoscaling lambda

---
 tests/ci/autoscale_runners_lambda/app.py      | 226 ++++++++++++++++++
 .../build_and_deploy_archive.sh               |   1 +
 .../autoscale_runners_lambda/requirements.txt |   1 +
 3 files changed, 228 insertions(+)
 create mode 100644 tests/ci/autoscale_runners_lambda/app.py
 create mode 120000 tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh
 create mode 100644 tests/ci/autoscale_runners_lambda/requirements.txt

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
new file mode 100644
index 00000000000..2777d5cf9c1
--- /dev/null
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+
+"""The lambda to decrease/increase ASG desired capacity based on current queue"""
+
+import json
+import logging
+import time
+from dataclasses import dataclass
+from pprint import pformat
+from typing import Any, List, Literal, Optional
+
+import boto3  # type: ignore
+import requests  # type: ignore
+
+RUNNER_TYPE_LABELS = [
+    "builder",
+    "func-tester",
+    "func-tester-aarch64",
+    "fuzzer-unit-tester",
+    "stress-tester",
+    "style-checker",
+    "style-checker-aarch64",
+]
+QUEUE_QUERY = f"""SELECT
+    last_status AS status,
+    toUInt32(count()) AS length,
+    labels
+FROM
+(
+    SELECT
+        arraySort(groupArray(status))[-1] AS last_status,
+        labels,
+        id,
+        html_url
+    FROM default.workflow_jobs
+    WHERE has(labels, 'self-hosted')
+        AND hasAny({RUNNER_TYPE_LABELS}, labels)
+        AND started_at > now() - INTERVAL 2 DAY
+    GROUP BY ALL
+    HAVING last_status IN ('in_progress', 'queued')
+)
+GROUP BY ALL
+ORDER BY labels, last_status"""
+
+
+@dataclass
+class Queue:
+    status: Literal["in_progress", "queued"]
+    lentgh: int
+    label: str
+
+
+### VENDORING
+def get_parameter_from_ssm(name, decrypt=True, client=None):
+    if not client:
+        client = boto3.client("ssm", region_name="us-east-1")
+    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
+
+
+class CHException(Exception):
+    pass
+
+
+class ClickHouseHelper:
+    def __init__(
+        self,
+        url: Optional[str] = None,
+        user: Optional[str] = None,
+        password: Optional[str] = None,
+    ):
+        self.url = url
+        self.auth = {}
+        if user:
+            self.auth["X-ClickHouse-User"] = user
+        if password:
+            self.auth["X-ClickHouse-Key"] = password
+
+    def _select_and_get_json_each_row(self, db, query):
+        params = {
+            "database": db,
+            "query": query,
+            "default_format": "JSONEachRow",
+        }
+        for i in range(5):
+            response = None
+            try:
+                response = requests.get(self.url, params=params, headers=self.auth)
+                response.raise_for_status()
+                return response.text
+            except Exception as ex:
+                logging.warning("Cannot fetch data with exception %s", str(ex))
+                if response:
+                    logging.warning("Reponse text %s", response.text)
+                time.sleep(0.1 * i)
+
+        raise CHException("Cannot fetch data from clickhouse")
+
+    def select_json_each_row(self, db, query):
+        text = self._select_and_get_json_each_row(db, query)
+        result = []
+        for line in text.split("\n"):
+            if line:
+                result.append(json.loads(line))
+        return result
+
+
+CH_CLIENT = ClickHouseHelper(get_parameter_from_ssm("clickhouse-test-stat-url"), "play")
+
+
+def set_capacity(
+    runner_type: str, queues: List[Queue], client: Any, dry_run: bool = True
+) -> None:
+    assert len(queues) in (1, 2)
+    assert all(q.label == runner_type for q in queues)
+    as_groups = client.describe_auto_scaling_groups(
+        Filters=[
+            {"Name": "tag-key", "Values": ["github:runner-type"]},
+            {"Name": "tag-value", "Values": [runner_type]},
+        ]
+    )["AutoScalingGroups"]
+    assert len(as_groups) == 1
+    asg = as_groups[0]
+    running = 0
+    queued = 0
+    for q in queues:
+        if q.status == "in_progress":
+            running = q.lentgh
+            continue
+        if q.status == "queued":
+            queued = q.lentgh
+            continue
+        raise ValueError("Queue status is not in ['in_progress', 'queued']")
+
+    capacity_reserve = max(0, asg["DesiredCapacity"] - running)
+    stop = False
+    if queued:
+        # This part is about scaling up
+        # First, let's check if there's enough runners to cover the queue
+        stop = stop or (asg["DesiredCapacity"] - running - queued) > 0
+
+        stop = stop or asg["MaxSize"] <= asg["DesiredCapacity"]
+        # Let's calculate a new desired capacity. Here the scale is used to not
+        # scale up and down too quickly
+        desired_capacity = asg["DesiredCapacity"] + ((queued - capacity_reserve) // 5)
+        desired_capacity = max(desired_capacity, asg["MinSize"])
+        desired_capacity = min(desired_capacity, asg["MaxSize"])
+        # Finally, should the capacity be even changed
+        stop = stop or asg["DesiredCapacity"] == desired_capacity
+        if stop:
+            return
+        logging.info(
+            "The ASG %s capacity will be increased to %s, current capacity=%s, "
+            "maximum capacity=%s, running jobs=%s, queue size=%s",
+            asg["AutoScalingGroupName"],
+            desired_capacity,
+            asg["DesiredCapacity"],
+            asg["MaxSize"],
+            running,
+            queued,
+        )
+        if not dry_run:
+            client.set_desired_capacity(
+                AutoScalingGroupName=asg["AutoScalingGroupName"],
+                DesiredCapacity=desired_capacity,
+            )
+        return
+
+    # Now we will calculate if we need to scale down
+    stop = stop or asg["DesiredCapacity"] <= asg["MinSize"]
+    stop = stop or asg["DesiredCapacity"] <= running
+    # Scale down quicker than scale up
+    desired_capacity = asg["DesiredCapacity"] - (capacity_reserve // 3)
+    desired_capacity = max(desired_capacity, asg["MinSize"])
+    desired_capacity = min(desired_capacity, asg["MaxSize"])
+    stop = stop or asg["DesiredCapacity"] == desired_capacity
+    if stop:
+        return
+
+    logging.info(
+        "The ASG %s capacity will be decreased to %s, current capacity=%s, "
+        "minimum capacity=%s, running jobs=%s, queue size=%s",
+        asg["AutoScalingGroupName"],
+        desired_capacity,
+        asg["DesiredCapacity"],
+        asg["MinSize"],
+        running,
+        queued,
+    )
+    if not dry_run:
+        client.set_desired_capacity(
+            AutoScalingGroupName=asg["AutoScalingGroupName"],
+            DesiredCapacity=desired_capacity,
+        )
+
+
+def main(dry_run: bool = True) -> None:
+    logging.getLogger().setLevel(logging.INFO)
+    asg_client = boto3.client("autoscaling")
+    try:
+        global CH_CLIENT
+        queues = CH_CLIENT.select_json_each_row("default", QUEUE_QUERY)
+    except CHException as ex:
+        logging.exception(
+            "Got an exception on insert, tryuing to update the client "
+            "credentials and repeat",
+            exc_info=ex,
+        )
+        CH_CLIENT = ClickHouseHelper(
+            get_parameter_from_ssm("clickhouse-test-stat-url"), "play"
+        )
+        queues = CH_CLIENT.select_json_each_row("default", QUEUE_QUERY)
+
+    logging.info("Received queue data:\n%s", pformat(queues, width=120))
+    for runner_type in RUNNER_TYPE_LABELS:
+        runner_queues = [
+            Queue(queue["status"], queue["length"], runner_type)
+            for queue in queues
+            if runner_type in queue["labels"]
+        ]
+        set_capacity(runner_type, runner_queues, asg_client, dry_run)
+
+
+def handler(event: dict, context: Any) -> None:
+    _ = event
+    _ = context
+    return main(False)
diff --git a/tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh b/tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh
new file mode 120000
index 00000000000..96ba3fa024e
--- /dev/null
+++ b/tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh
@@ -0,0 +1 @@
+../team_keys_lambda/build_and_deploy_archive.sh
\ No newline at end of file
diff --git a/tests/ci/autoscale_runners_lambda/requirements.txt b/tests/ci/autoscale_runners_lambda/requirements.txt
new file mode 100644
index 00000000000..f2293605cf1
--- /dev/null
+++ b/tests/ci/autoscale_runners_lambda/requirements.txt
@@ -0,0 +1 @@
+requests

From 0483de0e04af22ae9a51b1feceef86810efd2cda Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 2 Mar 2023 13:53:56 +0100
Subject: [PATCH 189/333] Reorder `status` in default.workflow_jobs

---
 tests/ci/workflow_jobs_lambda/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index f6589576806..9436e01ad53 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -213,7 +213,7 @@ def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
     #     `head_sha` String,
     #     `url` String,
     #     `html_url` String,
-    #     `status` Enum8('queued' = 1, 'in_progress' = 2, 'completed' = 3, 'waiting' = 4),
+    #     `status` Enum8('waiting' = 1, 'queued' = 2, 'in_progress' = 3, 'completed' = 4),
     #     `conclusion` LowCardinality(String),
     #     `started_at` DateTime,
     #     `completed_at` DateTime,

From a5869e25f644551b00c034bb25efa56d4ed111d3 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 2 Mar 2023 20:21:40 +0100
Subject: [PATCH 190/333] Add tests for autoscale_runners_lambda

---
 tests/ci/autoscale_runners_lambda_test.py | 133 ++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 tests/ci/autoscale_runners_lambda_test.py

diff --git a/tests/ci/autoscale_runners_lambda_test.py b/tests/ci/autoscale_runners_lambda_test.py
new file mode 100644
index 00000000000..e6ac157ee4a
--- /dev/null
+++ b/tests/ci/autoscale_runners_lambda_test.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+import unittest
+from dataclasses import dataclass
+from typing import Any, List
+
+from autoscale_runners_lambda.app import set_capacity, Queue
+
+
+@dataclass
+class TestCase:
+    name: str
+    min_size: int
+    desired_capacity: int
+    max_size: int
+    queues: List[Queue]
+    expected_capacity: int
+
+
+class TestSetCapacity(unittest.TestCase):
+    class FakeClient:
+        def __init__(self):
+            self._expected_data = {}  # type: dict
+            self._expected_capacity = -1
+
+        @property
+        def expected_data(self) -> dict:
+            """a one-time property"""
+            data, self._expected_data = self._expected_data, {}
+            return data
+
+        @expected_data.setter
+        def expected_data(self, value: dict) -> None:
+            self._expected_data = value
+
+        @property
+        def expected_capacity(self) -> int:
+            """one-time property"""
+            capacity, self._expected_capacity = self._expected_capacity, -1
+            return capacity
+
+        def describe_auto_scaling_groups(self, **kwargs: Any) -> dict:
+            _ = kwargs
+            return self.expected_data
+
+        def set_desired_capacity(self, **kwargs: Any) -> None:
+            self._expected_capacity = kwargs["DesiredCapacity"]
+
+        def data_helper(
+            self, name: str, min_size: int, desired_capacity: int, max_size: int
+        ) -> None:
+            self.expected_data = {
+                "AutoScalingGroups": [
+                    {
+                        "AutoScalingGroupName": name,
+                        "DesiredCapacity": desired_capacity,
+                        "MinSize": min_size,
+                        "MaxSize": max_size,
+                    }
+                ]
+            }
+
+    def setUp(self):
+        self.client = self.FakeClient()
+
+    def test_normal_cases(self):
+        test_cases = (
+            # Do not change capacity
+            TestCase("noqueue", 1, 13, 20, [Queue("in_progress", 155, "noqueue")], -1),
+            TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], -1),
+            TestCase("w/reserve", 1, 13, 20, [Queue("queued", 17, "w/reserve")], -1),
+            TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], -1),
+            TestCase("less-min", 10, 3, 20, [Queue("queued", 1, "less-min")], -1),
+            # Increase capacity
+            TestCase("increase", 1, 13, 20, [Queue("queued", 23, "increase")], 15),
+            TestCase("increase", 1, 13, 20, [Queue("queued", 18, "increase")], 14),
+            TestCase("increase", 1, 13, 20, [Queue("queued", 183, "increase")], 20),
+            TestCase(
+                "increase-w/o reserve",
+                1,
+                13,
+                20,
+                [
+                    Queue("in_progress", 11, "increase-w/o reserve"),
+                    Queue("queued", 12, "increase-w/o reserve"),
+                ],
+                15,
+            ),
+            TestCase("lower-min", 10, 5, 20, [Queue("queued", 5, "lower-min")], 10),
+            # Decrease capacity
+            TestCase("decrease", 1, 13, 20, [Queue("in_progress", 3, "decrease")], 10),
+            TestCase("decrease", 1, 13, 20, [Queue("in_progress", 5, "decrease")], 11),
+        )
+        for t in test_cases:
+            self.client.data_helper(t.name, t.min_size, t.desired_capacity, t.max_size)
+            set_capacity(t.name, t.queues, self.client, False)
+            self.assertEqual(t.expected_capacity, self.client.expected_capacity, t.name)
+
+    def test_exceptions(self):
+        test_cases = (
+            (
+                TestCase(
+                    "different names",
+                    1,
+                    1,
+                    1,
+                    [Queue("queued", 5, "another name")],
+                    -1,
+                ),
+                AssertionError,
+            ),
+            (TestCase("wrong queue len", 1, 1, 1, [], -1), AssertionError),
+            (
+                TestCase(
+                    "wrong queue", 1, 1, 1, [Queue("wrong", 1, "wrong queue")], -1  # type: ignore
+                ),
+                ValueError,
+            ),
+        )
+        for t, error in test_cases:
+            with self.assertRaises(error):
+                self.client.data_helper(
+                    t.name, t.min_size, t.desired_capacity, t.max_size
+                )
+                set_capacity(t.name, t.queues, self.client, False)
+
+        with self.assertRaises(AssertionError):
+            self.client.expected_data = {"AutoScalingGroups": [1, 2]}
+            set_capacity(
+                "wrong number of ASGs",
+                [Queue("queued", 1, "wrong number of ASGs")],
+                self.client,
+            )

From 4e9c2462648c265646cd79055837a73ecd6b7631 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 2 Mar 2023 20:28:27 +0100
Subject: [PATCH 191/333] Check CI python code always

---
 .github/workflows/pull_request.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 7d410f833c5..1efa6147c9a 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -37,7 +37,6 @@ jobs:
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 run_check.py
   PythonUnitTests:
-    needs: CheckLabels
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Check out repository code

From c1774cabd01cea086c96547e7fffd35a3a2282b2 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 2 Mar 2023 20:29:34 +0100
Subject: [PATCH 192/333] Do not install GitPython on runners, it is
 preinstalled

---
 .github/workflows/cherry_pick.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml
index 065e584182b..8d1e2055978 100644
--- a/.github/workflows/cherry_pick.yml
+++ b/.github/workflows/cherry_pick.yml
@@ -35,7 +35,6 @@ jobs:
           fetch-depth: 0
       - name: Cherry pick
         run: |
-          sudo pip install GitPython
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 cherry_pick.py
       - name: Cleanup

From 6a746beff8586ebbe496089b8da4dff044b5657a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 3 Mar 2023 09:53:21 +0100
Subject: [PATCH 193/333] Tune style-checker scaling up and down by modifiers

---
 tests/ci/autoscale_runners_lambda/app.py | 26 ++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 2777d5cf9c1..25c6c2eb9a8 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -7,7 +7,7 @@ import logging
 import time
 from dataclasses import dataclass
 from pprint import pformat
-from typing import Any, List, Literal, Optional
+from typing import Any, List, Literal, Optional, Tuple
 
 import boto3  # type: ignore
 import requests  # type: ignore
@@ -50,6 +50,19 @@ class Queue:
     label: str
 
 
+def get_scales(runner_type: str) -> Tuple[int, int]:
+    "returns the multipliers for scaling down and up ASG by types"
+    # Scaling down is quicker on the lack of running jobs than scaling up on
+    # queue
+    scale_down = 3
+    scale_up = 5
+    if runner_type == "style-checker":
+        # the style checkers have so many noise, so it scales up too quickly
+        scale_down = 2
+        scale_up = 10
+    return scale_down, scale_up
+
+
 ### VENDORING
 def get_parameter_from_ssm(name, decrypt=True, client=None):
     if not client:
@@ -131,6 +144,7 @@ def set_capacity(
             continue
         raise ValueError("Queue status is not in ['in_progress', 'queued']")
 
+    scale_down, scale_up = get_scales(runner_type)
     capacity_reserve = max(0, asg["DesiredCapacity"] - running)
     stop = False
     if queued:
@@ -139,9 +153,10 @@ def set_capacity(
         stop = stop or (asg["DesiredCapacity"] - running - queued) > 0
 
         stop = stop or asg["MaxSize"] <= asg["DesiredCapacity"]
-        # Let's calculate a new desired capacity. Here the scale is used to not
-        # scale up and down too quickly
-        desired_capacity = asg["DesiredCapacity"] + ((queued - capacity_reserve) // 5)
+        # Let's calculate a new desired capacity
+        desired_capacity = asg["DesiredCapacity"] + (
+            (queued - capacity_reserve) // scale_up
+        )
         desired_capacity = max(desired_capacity, asg["MinSize"])
         desired_capacity = min(desired_capacity, asg["MaxSize"])
         # Finally, should the capacity be even changed
@@ -168,8 +183,7 @@ def set_capacity(
     # Now we will calculate if we need to scale down
     stop = stop or asg["DesiredCapacity"] <= asg["MinSize"]
     stop = stop or asg["DesiredCapacity"] <= running
-    # Scale down quicker than scale up
-    desired_capacity = asg["DesiredCapacity"] - (capacity_reserve // 3)
+    desired_capacity = asg["DesiredCapacity"] - (capacity_reserve // scale_down)
     desired_capacity = max(desired_capacity, asg["MinSize"])
     desired_capacity = min(desired_capacity, asg["MaxSize"])
     stop = stop or asg["DesiredCapacity"] == desired_capacity

From a5764643604dff138150ad7719b7c0781ea75593 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 3 Mar 2023 12:01:05 +0100
Subject: [PATCH 194/333] Reduce statisctics interval, process empty response
 for runners

---
 tests/ci/autoscale_runners_lambda/app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 25c6c2eb9a8..ca173be2eab 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -35,7 +35,7 @@ FROM
     FROM default.workflow_jobs
     WHERE has(labels, 'self-hosted')
         AND hasAny({RUNNER_TYPE_LABELS}, labels)
-        AND started_at > now() - INTERVAL 2 DAY
+        AND started_at > now() - INTERVAL 1 DAY
     GROUP BY ALL
     HAVING last_status IN ('in_progress', 'queued')
 )
@@ -231,6 +231,7 @@ def main(dry_run: bool = True) -> None:
             for queue in queues
             if runner_type in queue["labels"]
         ]
+        runner_queues = runner_queues or [Queue("in_progress", 0, runner_type)]
         set_capacity(runner_type, runner_queues, asg_client, dry_run)
 
 

From 602933ce1edd9f02ec35b47ef428a49685fa123c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 3 Mar 2023 12:09:09 +0100
Subject: [PATCH 195/333] Make smarter scaler based on reserve/deficit

---
 tests/ci/autoscale_runners_lambda/app.py  | 19 +++++++++----------
 tests/ci/autoscale_runners_lambda_test.py |  5 ++---
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index ca173be2eab..596e675ee24 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -145,18 +145,18 @@ def set_capacity(
         raise ValueError("Queue status is not in ['in_progress', 'queued']")
 
     scale_down, scale_up = get_scales(runner_type)
-    capacity_reserve = max(0, asg["DesiredCapacity"] - running)
+    # How much nodes are free (positive) or need to be added (negative)
+    capacity_reserve = asg["DesiredCapacity"] - running - queued
     stop = False
-    if queued:
+    if capacity_reserve < 0:
         # This part is about scaling up
-        # First, let's check if there's enough runners to cover the queue
-        stop = stop or (asg["DesiredCapacity"] - running - queued) > 0
-
+        capacity_deficit = -capacity_reserve
+        # It looks that we are still OK, since no queued jobs exist
+        stop = stop or queued == 0
+        # Are we already at the capacity limits
         stop = stop or asg["MaxSize"] <= asg["DesiredCapacity"]
         # Let's calculate a new desired capacity
-        desired_capacity = asg["DesiredCapacity"] + (
-            (queued - capacity_reserve) // scale_up
-        )
+        desired_capacity = asg["DesiredCapacity"] + (capacity_deficit // scale_up)
         desired_capacity = max(desired_capacity, asg["MinSize"])
         desired_capacity = min(desired_capacity, asg["MaxSize"])
         # Finally, should the capacity be even changed
@@ -181,8 +181,7 @@ def set_capacity(
         return
 
     # Now we will calculate if we need to scale down
-    stop = stop or asg["DesiredCapacity"] <= asg["MinSize"]
-    stop = stop or asg["DesiredCapacity"] <= running
+    stop = stop or asg["DesiredCapacity"] == asg["MinSize"]
     desired_capacity = asg["DesiredCapacity"] - (capacity_reserve // scale_down)
     desired_capacity = max(desired_capacity, asg["MinSize"])
     desired_capacity = min(desired_capacity, asg["MaxSize"])
diff --git a/tests/ci/autoscale_runners_lambda_test.py b/tests/ci/autoscale_runners_lambda_test.py
index e6ac157ee4a..7efa0004745 100644
--- a/tests/ci/autoscale_runners_lambda_test.py
+++ b/tests/ci/autoscale_runners_lambda_test.py
@@ -67,10 +67,7 @@ class TestSetCapacity(unittest.TestCase):
         test_cases = (
             # Do not change capacity
             TestCase("noqueue", 1, 13, 20, [Queue("in_progress", 155, "noqueue")], -1),
-            TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], -1),
             TestCase("w/reserve", 1, 13, 20, [Queue("queued", 17, "w/reserve")], -1),
-            TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], -1),
-            TestCase("less-min", 10, 3, 20, [Queue("queued", 1, "less-min")], -1),
             # Increase capacity
             TestCase("increase", 1, 13, 20, [Queue("queued", 23, "increase")], 15),
             TestCase("increase", 1, 13, 20, [Queue("queued", 18, "increase")], 14),
@@ -88,6 +85,8 @@ class TestSetCapacity(unittest.TestCase):
             ),
             TestCase("lower-min", 10, 5, 20, [Queue("queued", 5, "lower-min")], 10),
             # Decrease capacity
+            TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], 11),
+            TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], 20),
             TestCase("decrease", 1, 13, 20, [Queue("in_progress", 3, "decrease")], 10),
             TestCase("decrease", 1, 13, 20, [Queue("in_progress", 5, "decrease")], 11),
         )

From 1bbf5acd47b86e19f6767619d372466bca28bec2 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <thevar1able@users.noreply.github.com>
Date: Fri, 3 Mar 2023 15:55:52 +0300
Subject: [PATCH 196/333] Pass headers from StorageURL to WriteBufferFromHTTP
 (#46996)

* Pass headers from StorageURL to WriteBufferFromHTTP

* Add a test

* Lint

* `time.sleep(1)`

* Start echo server earlier

* Add proper handling for mock server start

* Automatic style fix

---------

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
---
 src/IO/WriteBufferFromHTTP.cpp                |  4 ++
 src/IO/WriteBufferFromHTTP.h                  |  2 +
 src/Storages/StorageURL.cpp                   | 11 +++-
 src/Storages/StorageURL.h                     |  1 +
 .../test_storage_url_http_headers/__init__.py |  0
 .../http_headers_echo_server.py               | 31 +++++++++
 .../test_storage_url_http_headers/test.py     | 66 +++++++++++++++++++
 7 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration/test_storage_url_http_headers/__init__.py
 create mode 100644 tests/integration/test_storage_url_http_headers/http_headers_echo_server.py
 create mode 100644 tests/integration/test_storage_url_http_headers/test.py

diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp
index f7456ad6b6c..355c42a23c9 100644
--- a/src/IO/WriteBufferFromHTTP.cpp
+++ b/src/IO/WriteBufferFromHTTP.cpp
@@ -11,6 +11,7 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
     const std::string & method,
     const std::string & content_type,
     const std::string & content_encoding,
+    const HTTPHeaderEntries & additional_headers,
     const ConnectionTimeouts & timeouts,
     size_t buffer_size_)
     : WriteBufferFromOStream(buffer_size_)
@@ -28,6 +29,9 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
     if (!content_encoding.empty())
         request.set("Content-Encoding", content_encoding);
 
+    for (const auto & header: additional_headers)
+        request.add(header.name, header.value);
+
     LOG_TRACE((&Poco::Logger::get("WriteBufferToHTTP")), "Sending request to {}", uri.toString());
 
     ostr = &session->sendRequest(request);
diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h
index 6966bc8a5c5..ce5020dfa78 100644
--- a/src/IO/WriteBufferFromHTTP.h
+++ b/src/IO/WriteBufferFromHTTP.h
@@ -4,6 +4,7 @@
 #include <IO/WriteBuffer.h>
 #include <IO/WriteBufferFromOStream.h>
 #include <IO/HTTPCommon.h>
+#include <IO/HTTPHeaderEntries.h>
 #include <Poco/Net/HTTPClientSession.h>
 #include <Poco/Net/HTTPRequest.h>
 #include <Poco/Net/HTTPResponse.h>
@@ -22,6 +23,7 @@ public:
                                  const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only
                                  const std::string & content_type = "",
                                  const std::string & content_encoding = "",
+                                 const HTTPHeaderEntries & additional_headers = {},
                                  const ConnectionTimeouts & timeouts = {},
                                  size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
 
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 152dda8f360..c0ddb0bc48a 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -30,6 +30,7 @@
 #include <Common/NamedCollections/NamedCollections.h>
 #include <IO/HTTPCommon.h>
 #include <IO/ReadWriteBufferFromHTTP.h>
+#include <IO/HTTPHeaderEntries.h>
 
 #include <algorithm>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -458,6 +459,7 @@ StorageURLSink::StorageURLSink(
     ContextPtr context,
     const ConnectionTimeouts & timeouts,
     const CompressionMethod compression_method,
+    const HTTPHeaderEntries & headers,
     const String & http_method)
     : SinkToStorage(sample_block)
 {
@@ -465,7 +467,7 @@ StorageURLSink::StorageURLSink(
     std::string content_encoding = toContentEncodingName(compression_method);
 
     write_buf = wrapWriteBufferWithCompressionMethod(
-        std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, timeouts),
+        std::make_unique<WriteBufferFromHTTP>(Poco::URI(uri), http_method, content_type, content_encoding, headers, timeouts),
         compression_method,
         3);
     writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, format_settings);
@@ -530,6 +532,7 @@ public:
         ContextPtr context_,
         const ConnectionTimeouts & timeouts_,
         const CompressionMethod compression_method_,
+        const HTTPHeaderEntries & headers_,
         const String & http_method_)
         : PartitionedSink(partition_by, context_, sample_block_)
         , uri(uri_)
@@ -539,6 +542,7 @@ public:
         , context(context_)
         , timeouts(timeouts_)
         , compression_method(compression_method_)
+        , headers(headers_)
         , http_method(http_method_)
     {
     }
@@ -548,7 +552,7 @@ public:
         auto partition_path = PartitionedSink::replaceWildcards(uri, partition_id);
         context->getRemoteHostFilter().checkURL(Poco::URI(partition_path));
         return std::make_shared<StorageURLSink>(
-            partition_path, format, format_settings, sample_block, context, timeouts, compression_method, http_method);
+            partition_path, format, format_settings, sample_block, context, timeouts, compression_method, headers, http_method);
     }
 
 private:
@@ -560,6 +564,7 @@ private:
     const ConnectionTimeouts timeouts;
 
     const CompressionMethod compression_method;
+    const HTTPHeaderEntries headers;
     const String http_method;
 };
 
@@ -821,6 +826,7 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad
             context,
             getHTTPTimeouts(context),
             compression_method,
+            headers,
             http_method);
     }
     else
@@ -833,6 +839,7 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad
             context,
             getHTTPTimeouts(context),
             compression_method,
+            headers,
             http_method);
     }
 }
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index acf49f3cb71..1cfffc3e73a 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -137,6 +137,7 @@ public:
         ContextPtr context,
         const ConnectionTimeouts & timeouts,
         CompressionMethod compression_method,
+        const HTTPHeaderEntries & headers = {},
         const String & method = Poco::Net::HTTPRequest::HTTP_POST);
 
     std::string getName() const override { return "StorageURLSink"; }
diff --git a/tests/integration/test_storage_url_http_headers/__init__.py b/tests/integration/test_storage_url_http_headers/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py b/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py
new file mode 100644
index 00000000000..b1a3f6777b1
--- /dev/null
+++ b/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py
@@ -0,0 +1,31 @@
+import http.server
+
+RESULT_PATH = "/headers.txt"
+
+
+class RequestHandler(http.server.BaseHTTPRequestHandler):
+    def log_message(self, *args):
+        with open(RESULT_PATH, "w") as f:
+            f.write(self.headers.as_string())
+
+    def do_POST(self):
+        self.rfile.read1()
+        self.send_response(200)
+        self.end_headers()
+        self.wfile.write(b'{"status":"ok"}')
+
+
+if __name__ == "__main__":
+    with open(RESULT_PATH, "w") as f:
+        f.write("")
+    httpd = http.server.HTTPServer(
+        (
+            "localhost",
+            8000,
+        ),
+        RequestHandler,
+    )
+    try:
+        httpd.serve_forever()
+    finally:
+        httpd.server_close()
diff --git a/tests/integration/test_storage_url_http_headers/test.py b/tests/integration/test_storage_url_http_headers/test.py
new file mode 100644
index 00000000000..3bbf5ec81c9
--- /dev/null
+++ b/tests/integration/test_storage_url_http_headers/test.py
@@ -0,0 +1,66 @@
+import pytest
+import os
+import time
+
+from . import http_headers_echo_server
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+server = cluster.add_instance("node")
+
+
+def run_echo_server():
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+
+    server.copy_file_to_container(
+        os.path.join(script_dir, "http_headers_echo_server.py"),
+        "/http_headers_echo_server.py",
+    )
+
+    server.exec_in_container(
+        [
+            "bash",
+            "-c",
+            "python3 /http_headers_echo_server.py > /http_headers_echo.server.log 2>&1",
+        ],
+        detach=True,
+        user="root",
+    )
+
+    for _ in range(0, 10):
+        ping_response = server.exec_in_container(
+            ["curl", "-s", f"http://localhost:8000/"],
+            nothrow=True,
+        )
+
+        if "html" in ping_response:
+            return
+
+        print(ping_response)
+
+    raise Exception("Echo server is not responding")
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        run_echo_server()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_storage_url_http_headers(started_cluster):
+    query = "INSERT INTO TABLE FUNCTION url('http://localhost:8000/', JSON, 'a UInt64', headers('X-My-Custom-Header'='test-header')) VALUES (1)"
+
+    server.query(query)
+
+    result = server.exec_in_container(
+        ["cat", http_headers_echo_server.RESULT_PATH], user="root"
+    )
+
+    print(result)
+
+    assert "X-My-Custom-Header: test-header" in result

From 6c9225063674f01285f98a05382e9a80aee74f97 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Fri, 3 Mar 2023 08:21:02 -0500
Subject: [PATCH 197/333] add compression types for file function

---
 docs/en/sql-reference/table-functions/file.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index d2ef66dde73..594c328c3ff 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -6,21 +6,22 @@ sidebar_label: file
 
 # file
 
-Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
+Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones.
 
-`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
+`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables.
 
 **Syntax**
 
 ``` sql
-file(path [,format] [,structure])
+file(path [,format] [,structure] [,compression])
 ```
 
 **Parameters**
 
--   `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
--   `format` — The [format](../../interfaces/formats.md#formats) of the file.
+-   `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
+-   `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
 -   `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
+-   `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query.  The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
 
 **Returned value**
 
@@ -53,7 +54,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
 └─────────┴─────────┴─────────┘
 ```
 
-Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
+Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
 
 ``` sql
 SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
@@ -143,4 +144,4 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 
 **See Also**
 
--   [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
+-   [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)

From 221ac4a1e57fa124f81a677a3b8630518bcbe4bd Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Fri, 3 Mar 2023 13:23:44 +0000
Subject: [PATCH 198/333] Better

---
 src/Interpreters/ProcessList.cpp | 49 ++++++++++++--------------------
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 52674dc1c77..6f5553f58fd 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -362,7 +362,9 @@ QueryStatus::QueryStatus(
 
 QueryStatus::~QueryStatus()
 {
-    assert(executors.empty());
+    /// Check that all executors were invalidated.
+    for (const auto & e : executors)
+        assert(!e->executor);
 
     if (auto * memory_tracker = getMemoryTracker())
     {
@@ -394,35 +396,21 @@ CancellationCode QueryStatus::cancelQuery(bool)
     is_killed.store(true);
 
     std::unique_lock lock(executors_mutex);
-
-    /// Track all cancelled executors.
-    std::unordered_set<ExecutorHolder *> cancelled;
-    /// We cancel executors from the left to the right, so if the last executor
-    /// was cancelled, then all executors were cancelled.
-    while (!cancelled.contains(executors.back().get()))
+    for (const auto & e : executors)
     {
-        size_t size = executors.size();
-        /// We should create a copy of executor holder, because it can be
-        /// removed from vector in removePipelineExecutor from another thread
-        /// and reference will be invalid.
-        for (auto e : executors)
-        {
-            if (cancelled.contains(e.get()))
-                continue;
-            /// We should call cancel() with unlocked executors_mutex, because
-            /// cancel() can try to lock some internal mutex that is already locked by query executing
-            /// thread, and query executing thread can call removePipelineExecutor and lock executors_mutex,
-            /// which will lead to deadlock.
-            lock.unlock();
-            e->cancel();
-            lock.lock();
-            cancelled.insert(e.get());
-            /// While executors_mutex was unlocked, removePipelineExecutor could be called and
-            /// the size of executors could have changed. In this case we should start iterating
-            /// over it again to avoid using invalid iterators.
-            if (executors.size() != size)
-                break;
-        }
+        /// We should call cancel() with unlocked executors_mutex, because
+        /// cancel() can try to lock some internal mutex that is already locked by query executing
+        /// thread, and query executing thread can call removePipelineExecutor and lock executors_mutex,
+        /// which will lead to deadlock.
+        /// Note that the size and the content of executors cannot be changed while
+        /// executors_mutex is unlocked, because:
+        /// 1) We don't allow adding new executors while cancelling query in addPipelineExecutor
+        /// 2) We don't actually remove executor holder from executors in removePipelineExecutor,
+        /// just mark that executor is invalid.
+        /// So, it's safe to continue iteration over executors after subsequent mutex locking.
+        lock.unlock();
+        e->cancel();
+        lock.lock();
     }
 
     return CancellationCode::CancelSent;
@@ -446,9 +434,8 @@ void QueryStatus::removePipelineExecutor(PipelineExecutor * e)
     std::lock_guard lock(executors_mutex);
     auto it = std::find_if(executors.begin(), executors.end(), [e](const ExecutorHolderPtr & x){ return x->executor == e; });
     assert(it != executors.end());
-    /// Invalidate executor pointer inside holder.
+    /// Invalidate executor pointer inside holder, but don't remove holder from the executors (to avoid race with cancelQuery)
     (*it)->remove();
-    executors.erase(it);
 }
 
 bool QueryStatus::checkTimeLimit()

From e44e5281182c2cda7aef51bdabff529d337d1f3a Mon Sep 17 00:00:00 2001
From: Suzy Wang <suzy.wang@ibm.com>
Date: Fri, 3 Mar 2023 06:20:18 -0800
Subject: [PATCH 199/333] remove unnecessary reverse memcpy

---
 src/Common/formatIPv6.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h
index bc8f70f047c..7b88f93750b 100644
--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@@ -82,11 +82,7 @@ inline bool parseIPv4(T * &src, EOFfunction eof, unsigned char * dst, int32_t fi
             break;
     }
 
-    if constexpr (std::endian::native == std::endian::little)
-        memcpy(dst, &result, sizeof(result));
-    else
-        reverseMemcpy(dst, &result, sizeof(result));
-
+    memcpy(dst, &result, sizeof(result));
     return true;
 }
 

From a8ceab136635a9238a431dea5d651c9731a10e6a Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Fri, 3 Mar 2023 09:24:58 -0500
Subject: [PATCH 200/333] NOLINT for getenv

---
 utils/self-extracting-executable/decompressor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 39fe552dfac..8db0d95e70d 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -170,7 +170,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
 
 bool isSudo()
 {
-    return getuid() == 0 && geteuid() == 0 && getenv("SUDO_USER") && getenv("SUDO_UID") && getenv("SUDO_GID");
+    return getuid() == 0 && geteuid() == 0 && getenv("SUDO_USER") && getenv("SUDO_UID") && getenv("SUDO_GID"); // NOLINT(concurrency-mt-unsafe)
 }
 
 /// Read data about files and decomrpess them.

From 737cf8e14934f7b4ce4869241fb67e461143bc45 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 3 Mar 2023 15:14:49 +0000
Subject: [PATCH 201/333] Better

---
 src/Client/HedgedConnections.cpp              |   4 +-
 src/Client/MultiplexedConnections.cpp         |   4 +-
 src/Core/Settings.h                           |   3 +-
 src/Core/SettingsEnums.cpp                    |   5 -
 src/Core/SettingsEnums.h                      |   9 -
 src/Interpreters/Cluster.cpp                  |   2 +-
 src/Interpreters/Context.cpp                  |  23 ++-
 src/Interpreters/Context.h                    |   9 +
 src/Interpreters/InterpreterSelectQuery.cpp   |  45 +++--
 .../getCustomKeyFilterForParallelReplicas.cpp |  49 ++---
 .../getCustomKeyFilterForParallelReplicas.h   |   7 +-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |   5 +-
 src/Storages/SelectQueryInfo.h                |   2 +
 src/Storages/StorageDistributed.cpp           |  55 +-----
 .../test_parallel_replicas_custom_key/test.py |  74 ++------
 ...4_shard_distributed_with_many_replicas.sql |   2 +-
 ...here_max_parallel_replicas_distributed.sql |   5 +-
 .../01034_sample_final_distributed.sql        |   5 +-
 .../01557_max_parallel_replicas_no_sample.sql |   3 +-
 .../02221_parallel_replicas_bug.sh            |   2 +-
 ...arallel_reading_from_replicas_benchmark.sh |   1 -
 .../02404_memory_bound_merging.sql            |   3 +-
 ...max_parallel_replicas_custom_key.reference | 175 +++++++++++-------
 .../02535_max_parallel_replicas_custom_key.sh |  12 +-
 24 files changed, 222 insertions(+), 282 deletions(-)

diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp
index b1fa6886e84..b97f9454fa5 100644
--- a/src/Client/HedgedConnections.cpp
+++ b/src/Client/HedgedConnections.cpp
@@ -175,9 +175,7 @@ void HedgedConnections::sendQuery(
             modified_settings.group_by_two_level_threshold_bytes = 0;
         }
 
-        const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1
-            && (settings.parallel_replicas_mode != ParallelReplicasMode::READ_TASKS
-                || !settings.allow_experimental_parallel_reading_from_replicas);
+        const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas;
 
         if (offset_states.size() > 1 && enable_sample_offset_parallel_processing)
         {
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index ec6788105ca..cc260353339 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -141,9 +141,7 @@ void MultiplexedConnections::sendQuery(
         }
     }
 
-    const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1
-        && (settings.parallel_replicas_mode != ParallelReplicasMode::READ_TASKS
-            || !settings.allow_experimental_parallel_reading_from_replicas);
+    const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas;
 
     size_t num_replicas = replica_states.size();
     if (num_replicas > 1)
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 83aa6104996..797f57f1c04 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -147,9 +147,8 @@ class IColumn;
     M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
     M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
     M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
-    M(Map, parallel_replicas_custom_key, "", "Custom key for parallel replicas using modulo operation on the key for assigning work to replicas.", 0) \
+    M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
     M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
-    M(ParallelReplicasMode, parallel_replicas_mode, ParallelReplicasMode::SAMPLE_KEY, "How to process query using multiple replicas.", 0) \
     \
     M(String, cluster_for_parallel_replicas, "default", "Cluster for a shard in which current server is located", 0) \
     M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index d5bd6cb2ae6..b04757ecaa2 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -167,11 +167,6 @@ IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
     {{"clickhouse", Dialect::clickhouse},
      {"kusto", Dialect::kusto}})
 
-IMPLEMENT_SETTING_ENUM(ParallelReplicasMode, ErrorCodes::BAD_ARGUMENTS,
-    {{"sample_key", ParallelReplicasMode::SAMPLE_KEY},
-     {"custom_key", ParallelReplicasMode::CUSTOM_KEY},
-     {"read_tasks", ParallelReplicasMode::READ_TASKS}})
-
 IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,
     {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT},
      {"range", ParallelReplicasCustomKeyFilterType::RANGE}})
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 5bb36cec60d..ae9456cc6d7 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -203,15 +203,6 @@ enum class Dialect
 
 DECLARE_SETTING_ENUM(Dialect)
 
-enum class ParallelReplicasMode : uint8_t
-{
-    SAMPLE_KEY,
-    CUSTOM_KEY,
-    READ_TASKS,
-};
-
-DECLARE_SETTING_ENUM(ParallelReplicasMode)
-
 enum class ParallelReplicasCustomKeyFilterType : uint8_t
 {
     DEFAULT,
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 1975fa29686..b419dacd523 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -752,7 +752,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
             auto shuffled_replicas = replicas;
             // shuffle replicas so we don't always pick the same subset
             shuffleReplicas(shuffled_replicas, settings);
-            create_shards_from_replicas(std::span{shuffled_replicas.begin(), shuffled_replicas.begin() + max_replicas_from_shard});
+            create_shards_from_replicas(std::span{shuffled_replicas.begin(), max_replicas_from_shard});
         }
     }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 08a3cf206c5..90f9c3bbac4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4056,23 +4056,34 @@ std::shared_ptr<AsyncReadCounters> Context::getAsyncReadCounters() const
     return async_read_counters;
 }
 
+Context::ParallelReplicasMode Context::getParallelReplicasMode() const
+{
+    const auto & settings = getSettingsRef();
+
+    using enum Context::ParallelReplicasMode;
+    if (!settings.parallel_replicas_custom_key.value.empty())
+        return CUSTOM_KEY;
+
+    if (settings.allow_experimental_parallel_reading_from_replicas
+        && !settings.use_hedged_requests)
+        return READ_TASKS;
+
+    return SAMPLE_KEY;
+}
+
 bool Context::canUseParallelReplicasOnInitiator() const
 {
     const auto & settings = getSettingsRef();
-    return settings.allow_experimental_parallel_reading_from_replicas
-        && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS
+    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
         && settings.max_parallel_replicas > 1
-        && !settings.use_hedged_requests
         && !getClientInfo().collaborate_with_initiator;
 }
 
 bool Context::canUseParallelReplicasOnFollower() const
 {
     const auto & settings = getSettingsRef();
-    return settings.allow_experimental_parallel_reading_from_replicas
-        && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS
+    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
         && settings.max_parallel_replicas > 1
-        && !settings.use_hedged_requests
         && getClientInfo().collaborate_with_initiator;
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 19bb6868331..67594a41459 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1123,6 +1123,15 @@ public:
     bool canUseParallelReplicasOnInitiator() const;
     bool canUseParallelReplicasOnFollower() const;
 
+    enum class ParallelReplicasMode : uint8_t
+    {
+        SAMPLE_KEY,
+        CUSTOM_KEY,
+        READ_TASKS,
+    };
+
+    ParallelReplicasMode getParallelReplicasMode() const;
+
 private:
     std::unique_lock<std::recursive_mutex> getLock() const;
 
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index e84df90c449..c53734d1e9b 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -517,29 +517,38 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
 
     ASTPtr parallel_replicas_custom_filter_ast = nullptr;
-    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty())
+    if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty())
     {
-        if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, joined_tables.tablesWithColumns().front().table, *context))
+        if (settings.parallel_replicas_count > 1)
         {
-            LOG_INFO(log, "Processing query on a replica using custom_key");
-            if (!storage)
-                throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
+            if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context))
+            {
+                LOG_INFO(log, "Processing query on a replica using custom_key");
+                if (!storage)
+                    throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
 
-            parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
-                settings.parallel_replicas_count,
-                settings.parallel_replica_offset,
-                std::move(custom_key_ast),
-                settings.parallel_replicas_custom_key_filter_type,
-                *storage,
-                context);
+                parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
+                    settings.parallel_replicas_count,
+                    settings.parallel_replica_offset,
+                    std::move(custom_key_ast),
+                    settings.parallel_replicas_custom_key_filter_type,
+                    *storage,
+                    context);
+            }
+            else if (settings.parallel_replica_offset > 0)
+            {
+                LOG_DEBUG(
+                    log,
+                    "Will use no data on this replica because parallel replicas processing with custom_key has been requested"
+                    " (setting 'max_parallel_replicas') but the table does not have custom_key defined for it or it's invalid (settings `parallel_replicas_custom_key`)");
+                parallel_replicas_custom_filter_ast = std::make_shared<ASTLiteral>(false);
+            }
         }
-        else if (settings.parallel_replica_offset > 0)
+        else if (auto * distributed = dynamic_cast<StorageDistributed *>(storage.get());
+                 distributed && canUseCustomKey(settings, *distributed->getCluster(), *context))
         {
-            LOG_DEBUG(
-                log,
-                "Will use no data on this replica because parallel replicas processing with custom_key has been requested"
-                " (setting 'max_parallel_replicas') but the table does not have custom_key defined for it (settings `parallel_replicas_custom_key`)");
-            parallel_replicas_custom_filter_ast = std::make_shared<ASTLiteral>(false);
+            query_info.use_custom_key = true;
+            context->setSetting("distributed_group_by_no_merge", 2);
         }
     }
 
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
index 229668ceff4..2a32d450497 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp
@@ -20,6 +20,12 @@ namespace ErrorCodes
     extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
 }
 
+bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context)
+{
+    return settings.max_parallel_replicas > 1 && context.getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY
+        && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
+}
+
 ASTPtr getCustomKeyFilterForParallelReplica(
     size_t replicas_count,
     size_t replica_num,
@@ -115,43 +121,14 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
 }
 
-ASTPtr parseCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context)
+ASTPtr parseCustomKeyForTable(const String & custom_key, const Context & context)
 {
-    for (size_t i = 0; i < custom_keys.size(); ++i)
-    {
-        const auto & tuple = custom_keys[i].safeGet<const Tuple &>();
-        auto & table = tuple.at(0).safeGet<String>();
-        auto & filter = tuple.at(1).safeGet<String>();
-
-        if (table == target.alias ||
-            (table == target.table && context.getCurrentDatabase() == target.database) ||
-            (table == target.database + '.' + target.table))
-        {
-            /// Try to parse expression
-            ParserExpression parser;
-            const auto & settings = context.getSettingsRef();
-            return parseQuery(
-                parser, filter.data(), filter.data() + filter.size(),
-                "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
-        }
-    }
-
-    return nullptr;
-}
-
-bool containsCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context)
-{
-    for (size_t i = 0; i < custom_keys.size(); ++i)
-    {
-        const auto & tuple = custom_keys[i].safeGet<const Tuple &>();
-        auto & table = tuple.at(0).safeGet<String>();
-
-        if (table == target.alias ||
-            (table == target.table && context.getCurrentDatabase() == target.database) ||
-            (table == target.database + '.' + target.table))
-            return true;
-    }
-    return false;
+    /// Try to parse expression
+    ParserExpression parser;
+    const auto & settings = context.getSettingsRef();
+    return parseQuery(
+        parser, custom_key.data(), custom_key.data() + custom_key.size(),
+        "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
 }
 
 }
diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
index fc515132487..543f1889b32 100644
--- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
+++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Interpreters/Context_fwd.h>
+#include <Interpreters/Cluster.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage.h>
 #include <Core/SettingsEnums.h>
@@ -9,6 +10,8 @@
 namespace DB
 {
 
+bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context);
+
 /// Get AST for filter created from custom_key
 /// replica_num is the number of the replica for which we are generating filter starting from 0
 ASTPtr getCustomKeyFilterForParallelReplica(
@@ -19,8 +22,6 @@ ASTPtr getCustomKeyFilterForParallelReplica(
     const IStorage & storage,
     const ContextPtr & context);
 
-ASTPtr parseCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context);
-
-bool containsCustomKeyForTable(const Map & custom_keys, const DatabaseAndTableWithAlias & target, const Context & context);
+ASTPtr parseCustomKeyForTable(const String & custom_keys, const Context & context);
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 7e914486326..07da66e4378 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -588,9 +588,10 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
         * It is also important that the entire universe can be covered using SAMPLE 0.1 OFFSET 0, ... OFFSET 0.9 and similar decimals.
         */
 
+    auto parallel_replicas_mode = context->getParallelReplicasMode();
     /// Parallel replicas has been requested but there is no way to sample data.
     /// Select all data from first replica and no data from other replicas.
-    if (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY
+    if (settings.parallel_replicas_count > 1 && parallel_replicas_mode == Context::ParallelReplicasMode::SAMPLE_KEY
         && !data.supportsSampling() && settings.parallel_replica_offset > 0)
     {
         LOG_DEBUG(
@@ -602,7 +603,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
     }
 
     sampling.use_sampling = relative_sample_size > 0
-        || (settings.parallel_replicas_count > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::SAMPLE_KEY
+        || (settings.parallel_replicas_count > 1 && parallel_replicas_mode == Context::ParallelReplicasMode::SAMPLE_KEY
             && data.supportsSampling());
     bool no_data = false; /// There is nothing left after sampling.
 
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index 7a3452f87d4..e3996950e79 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -207,6 +207,8 @@ struct SelectQueryInfo
     ///
     /// Configured in StorageDistributed::getQueryProcessingStage()
     ClusterPtr optimized_cluster;
+    /// should we use custom key with the cluster
+    bool use_custom_key = false;
 
     mutable ParallelReplicasReadingCoordinatorPtr coordinator;
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index b088a008b46..259caaf394a 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -264,12 +264,6 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus
     return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas;
 }
 
-bool canUseCustomKey(const Settings & settings, const Cluster & cluster)
-{
-    return settings.max_parallel_replicas > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
-        && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
-}
-
 }
 
 /// For destruction of std::unique_ptr of type that is incomplete in class definition.
@@ -412,36 +406,9 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
 
     size_t nodes = getClusterQueriedNodes(settings, cluster);
 
-    const auto use_virtual_shards = [&]
-    {
-        if (!canUseCustomKey(settings, *cluster))
-            return false;
-
-        auto distributed_table = DatabaseAndTableWithAlias(
-            *getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0), local_context->getCurrentDatabase());
-
-        if (containsCustomKeyForTable(settings.parallel_replicas_custom_key, distributed_table, *local_context))
-        {
-            LOG_INFO(log, "Found custom_key for {}", distributed_table.getQualifiedNamePrefix(false));
-            return true;
-        }
-
-        DatabaseAndTableWithAlias remote_table_info;
-        remote_table_info.database = remote_database;
-        remote_table_info.table = remote_table;
-        if (containsCustomKeyForTable(settings.parallel_replicas_custom_key, remote_table_info, *local_context))
-        {
-            LOG_INFO(log, "Found custom_key for {}", remote_table_info.getQualifiedNamePrefix(false));
-            return true;
-        }
-
-        return false;
-    };
-
-    if (use_virtual_shards())
+    if (query_info.use_custom_key)
     {
         LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards");
-
         query_info.cluster = cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
     }
     else
@@ -816,25 +783,9 @@ void StorageDistributed::read(
     auto settings = local_context->getSettingsRef();
 
     ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
-    if (canUseCustomKey(settings, *getCluster()))
+    if (query_info.use_custom_key)
     {
-        const auto get_custom_key_ast = [&]() -> ASTPtr
-        {
-            auto distributed_table = DatabaseAndTableWithAlias(
-                *getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0), local_context->getCurrentDatabase());
-            if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, distributed_table, *local_context))
-                return custom_key_ast;
-
-            DatabaseAndTableWithAlias remote_table_info;
-            remote_table_info.database = remote_database;
-            remote_table_info.table = remote_table;
-            if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, remote_table_info, *local_context))
-                return custom_key_ast;
-
-            return nullptr;
-        };
-
-        if (auto custom_key_ast = get_custom_key_ast())
+        if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context))
         {
             if (query_info.getCluster()->getShardCount() == 1)
             {
diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py
index 9222f417a94..342abdcb088 100644
--- a/tests/integration/test_parallel_replicas_custom_key/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@@ -44,11 +44,11 @@ def create_tables(cluster):
 def insert_data(cluster, row_num):
     create_tables(cluster)
     n1 = nodes[0]
-    n1.query(f"INSERT INTO dist_table SELECT number, number FROM numbers({row_num})")
+    n1.query(f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})")
     n1.query("SYSTEM FLUSH DISTRIBUTED dist_table")
 
 
-@pytest.mark.parametrize("custom_key", ["sipHash64(value)", "key"])
+@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"])
 @pytest.mark.parametrize("filter_type", ["default", "range"])
 @pytest.mark.parametrize(
     "cluster",
@@ -61,21 +61,22 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
     row_num = 1000
     insert_data(cluster, row_num)
 
+    expected_result = ""
+    for i in range(4):
+        expected_result += f"{i}\t250\n"
+
     n1 = nodes[0]
     assert (
-        int(
-            n1.query(
-                "SELECT count() FROM dist_table",
-                settings={
-                    "prefer_localhost_replica": 0,
-                    "max_parallel_replicas": 4,
-                    "parallel_replicas_mode": "custom_key",
-                    "parallel_replicas_custom_key": f"{{'test_table': '{custom_key}'}}",
-                    "parallel_replicas_custom_key_filter_type": filter_type,
-                },
-            )
+        n1.query(
+            "SELECT key, count() FROM dist_table GROUP BY key ORDER BY key",
+            settings={
+                "prefer_localhost_replica": 0,
+                "max_parallel_replicas": 4,
+                "parallel_replicas_custom_key": custom_key,
+                "parallel_replicas_custom_key_filter_type": filter_type,
+            },
         )
-        == row_num
+        == expected_result
     )
 
     if cluster == "test_multiple_shards_multiple_replicas":
@@ -89,48 +90,3 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
         assert n1.contains_in_log(
             "Single shard cluster used with custom_key, transforming replicas into virtual shards"
         )
-
-
-def test_custom_key_different_table_names(start_cluster):
-    def run(table_source, table_name_for_custom_key, should_use_virtual_shard=True):
-        for node in nodes:
-            node.rotate_logs()
-
-        row_num = 1000
-        insert_data("test_single_shard_multiple_replicas", row_num)
-
-        n1 = nodes[0]
-        assert (
-            int(
-                n1.query(
-                    f"SELECT count() FROM {table_source}",
-                    settings={
-                        "prefer_localhost_replica": 0,
-                        "max_parallel_replicas": 4,
-                        "parallel_replicas_mode": "custom_key",
-                        "parallel_replicas_custom_key": f"{{'{table_name_for_custom_key}': 'sipHash64(value)'}}",
-                    },
-                )
-            )
-            == row_num
-        )
-
-        # we first transform all replicas into shards and then append for each shard filter
-        assert not should_use_virtual_shard or n1.contains_in_log(
-            "Single shard cluster used with custom_key, transforming replicas into virtual shards"
-        )
-
-    run("dist_table", "dist_table")
-    run("dist_table as d", "d")
-    run("dist_table as d", "dist_table")
-    run("dist_table as d", "test_table")
-    run(
-        "cluster('test_single_shard_multiple_replicas', default.test_table)",
-        "test_table",
-    )
-    run("cluster('test_single_shard_multiple_replicas', default.test_table) as d", "d")
-    run(
-        "cluster('test_single_shard_multiple_replicas', default.test_table) as d",
-        "test_table",
-    )
-    run("dist_table as d", "dist", should_use_virtual_shard=False)
diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
index 795551e5dfa..901b818cbc0 100644
--- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
+++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql
@@ -1,7 +1,7 @@
 -- Tags: replica, distributed
 
+SET allow_experimental_parallel_reading_from_replicas = 0;
 SET max_parallel_replicas = 2;
-SET parallel_replicas_mode = 'sample_key';
 DROP TABLE IF EXISTS report;
 
 set allow_deprecated_syntax_for_merge_tree=1;
diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
index 96d52d7e60e..86c84427297 100644
--- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
+++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql
@@ -1,5 +1,7 @@
 -- Tags: replica, distributed
 
+set allow_experimental_parallel_reading_from_replicas=0;
+
 drop table if exists test_max_parallel_replicas_lr;
 
 -- If you wonder why the table is named with "_lr" suffix in this test.
@@ -9,9 +11,6 @@ CREATE TABLE test_max_parallel_replicas_lr (timestamp UInt64) ENGINE = MergeTree
 INSERT INTO test_max_parallel_replicas_lr select number as timestamp from system.numbers limit 100;
 
 SET max_parallel_replicas = 2;
-SET parallel_replicas_mode = 'sample_key';
-SET allow_experimental_parallel_reading_from_replicas = 0;
-
 select count() FROM remote('127.0.0.{2|3}', currentDatabase(), test_max_parallel_replicas_lr) PREWHERE timestamp > 0;
 
 drop table test_max_parallel_replicas_lr;
diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql
index 1b1832f7a0d..a81fef645db 100644
--- a/tests/queries/0_stateless/01034_sample_final_distributed.sql
+++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql
@@ -1,5 +1,7 @@
 -- Tags: distributed
 
+set allow_experimental_parallel_reading_from_replicas = 0;
+
 drop table if exists sample_final;
 create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID);
 insert into sample_final select number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), number % 3 = 1 ? -1 : 1 from numbers(1000000);
@@ -13,10 +15,7 @@ select count() from sample_final sample 1/2;
 select 'count sample final';
 select count() from sample_final final sample 1/2;
 select 'count final max_parallel_replicas';
-
-set allow_experimental_parallel_reading_from_replicas = 0;
 set max_parallel_replicas=2;
-set parallel_replicas_mode='sample_key';
 select count() from remote('127.0.0.{2|3}', currentDatabase(), sample_final) final;
 
 drop table if exists sample_final;
diff --git a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
index c44c335700f..04777f5b31c 100644
--- a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
+++ b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql
@@ -1,10 +1,11 @@
 -- Tags: replica
 
+SET allow_experimental_parallel_reading_from_replicas=0;
+
 DROP TABLE IF EXISTS t;
 CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x;
 INSERT INTO t VALUES ('Hello');
 
-SET parallel_replicas_mode = 'sample_key';
 SET max_parallel_replicas = 3;
 SELECT * FROM remote('127.0.0.{2|3|4}', currentDatabase(), t);
 
diff --git a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh
index 1a703b20d2a..cce32bf8272 100755
--- a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh
+++ b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh
@@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 
-${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_mode='read_tasks' -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null
+${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null
diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
index 283c51cf0a2..2a163746e20 100755
--- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
+++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
@@ -15,7 +15,6 @@ as select * from numbers(1);
 #   Logical error: 'Coordinator for parallel reading from replicas is not initialized'.
 opts=(
     --allow_experimental_parallel_reading_from_replicas 1
-    --parallel_replicas_mode 'read_tasks'
     --max_parallel_replicas 3
 
     --iterations 1
diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql
index 6e6bc5a3b6c..b6299de9aae 100644
--- a/tests/queries/0_stateless/02404_memory_bound_merging.sql
+++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel, long, no-random-merge-tree-settings
+-- Tags: no-parallel, no-random-merge-tree-settings
 
 drop table if exists pr_t;
 drop table if exists dist_t_different_dbs;
@@ -61,7 +61,6 @@ create table pr_t(a UInt64, b UInt64) engine=MergeTree order by a;
 insert into pr_t select number % 1000, number % 1000 from numbers_mt(1e6);
 
 set allow_experimental_parallel_reading_from_replicas = 1;
-set parallel_replicas_mode = 'read_tasks';
 set max_parallel_replicas = 3;
 set use_hedged_requests = 0;
 set cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost';
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
index 21ed26b7579..9e4e24235aa 100644
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
@@ -23,103 +23,150 @@ filter_type='range' max_replicas=2 prefer_localhost_replica=1
 Hello
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
 Hello
-query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x)'
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
-1000
-query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='y'
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
-1000
-query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x) + y'
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1'
 filter_type='default' max_replicas=1 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=2 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=3 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=1 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=2 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=3 prefer_localhost_replica=0
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=1 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=2 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='default' max_replicas=3 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=1 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=2 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
 filter_type='range' max_replicas=3 prefer_localhost_replica=1
-1000
-query='SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='cityHash64(x) + 1'
-filter_type='default' max_replicas=1 prefer_localhost_replica=0
-1000
-filter_type='default' max_replicas=2 prefer_localhost_replica=0
-1000
-filter_type='default' max_replicas=3 prefer_localhost_replica=0
-1000
-filter_type='range' max_replicas=1 prefer_localhost_replica=0
-1000
-filter_type='range' max_replicas=2 prefer_localhost_replica=0
-1000
-filter_type='range' max_replicas=3 prefer_localhost_replica=0
-1000
-filter_type='default' max_replicas=1 prefer_localhost_replica=1
-1000
-filter_type='default' max_replicas=2 prefer_localhost_replica=1
-1000
-filter_type='default' max_replicas=3 prefer_localhost_replica=1
-1000
-filter_type='range' max_replicas=1 prefer_localhost_replica=1
-1000
-filter_type='range' max_replicas=2 prefer_localhost_replica=1
-1000
-filter_type='range' max_replicas=3 prefer_localhost_replica=1
-1000
+0	334
+1	333
+2	333
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
index b6e5c853dc2..314251be874 100755
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@@ -12,8 +12,7 @@ function run_with_custom_key {
             for max_replicas in {1..3}; do
                 echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica"
                 query="$1 SETTINGS max_parallel_replicas=$max_replicas\
-    , parallel_replicas_mode='custom_key'\
-    , parallel_replicas_custom_key={'02535_custom_key': '$2'}\
+    , parallel_replicas_custom_key='$2'\
     , parallel_replicas_custom_key_filter_type='$filter_type'\
     , prefer_localhost_replica=$prefer_localhost_replica"
                 $CLICKHOUSE_CLIENT --query="$query"
@@ -32,15 +31,14 @@ run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas
 $CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"
 
 $CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x)"
-$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number FROM numbers(1000)"
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)"
 
 function run_count_with_custom_key {
-    run_with_custom_key "SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "$1"
+    run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1"
 }
 
-run_count_with_custom_key "cityHash64(x)"
 run_count_with_custom_key "y"
-run_count_with_custom_key "cityHash64(x) + y"
-run_count_with_custom_key "cityHash64(x) + 1"
+run_count_with_custom_key "cityHash64(y)"
+run_count_with_custom_key "cityHash64(y) + 1"
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"

From 46667d43d88d2b30acdae30b9316df385ec20369 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 3 Mar 2023 15:25:25 +0000
Subject: [PATCH 202/333] Automatic style fix

---
 tests/integration/test_parallel_replicas_custom_key/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py
index 342abdcb088..baac2661506 100644
--- a/tests/integration/test_parallel_replicas_custom_key/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@@ -44,7 +44,9 @@ def create_tables(cluster):
 def insert_data(cluster, row_num):
     create_tables(cluster)
     n1 = nodes[0]
-    n1.query(f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})")
+    n1.query(
+        f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})"
+    )
     n1.query("SYSTEM FLUSH DISTRIBUTED dist_table")
 
 

From 0f2ae721411afc2b3150bde3a4a12c5426118f6f Mon Sep 17 00:00:00 2001
From: AVMusorin <aleksandr.musorin@semrush.com>
Date: Thu, 2 Mar 2023 13:51:21 +0100
Subject: [PATCH 203/333] prohibit DEFAULT/EPHEMERAL/ALIAS in KafkaEngine

---
 .../table-engines/integrations/kafka.md       |  4 ++
 .../sql-reference/statements/create/table.md  |  4 ++
 src/Storages/ColumnsDescription.cpp           |  9 ++++
 src/Storages/ColumnsDescription.h             |  3 ++
 src/Storages/Kafka/StorageKafka.cpp           |  5 ++
 tests/integration/test_storage_kafka/test.py  | 50 +++++++++++++++++++
 6 files changed, 75 insertions(+)

diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index ef422632d3e..e6134043b8e 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -125,6 +125,10 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi
 2.  Create a table with the desired structure.
 3.  Create a materialized view that converts data from the engine and puts it into a previously created table.
 
+:::info
+Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level.
+:::
+
 When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`.
 One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without).
 
diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index 9e66afba613..ba495b0eed5 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -121,6 +121,10 @@ If the data type and default expression are defined explicitly, this expression
 
 Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
 
+:::info
+Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level, see [Kafka Engine](../../../engines/table-engines/integrations/kafka.md#description).
+:::
+
 ### DEFAULT
 
 `DEFAULT expr`
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index d401840eec7..fa39e304925 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -383,6 +383,15 @@ NamesAndTypesList ColumnsDescription::getEphemeral() const
     return ret;
 }
 
+NamesAndTypesList ColumnsDescription::getWithDefaultExpression() const
+{
+    NamesAndTypesList ret;
+    for (const auto & col : columns)
+        if (col.default_desc.expression)
+            ret.emplace_back(col.name, col.type);
+    return ret;
+}
+
 NamesAndTypesList ColumnsDescription::getAll() const
 {
     NamesAndTypesList ret;
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index 4f874f4b850..36109392ab6 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -132,6 +132,9 @@ public:
     NamesAndTypesList getInsertable() const; /// ordinary + ephemeral
     NamesAndTypesList getAliases() const;
     NamesAndTypesList getEphemeral() const;
+    // Columns with preset default expression.
+    // For example from `CREATE TABLE` statement
+    NamesAndTypesList getWithDefaultExpression() const;
     NamesAndTypesList getAllPhysical() const; /// ordinary + materialized.
     NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + ephemeral
     /// Returns .size0/.null/...
diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
index 7b97273d8af..2afdc0dda8a 100644
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@@ -959,6 +959,11 @@ void registerStorageKafka(StorageFactory & factory)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1");
         }
+        if (args.columns.getOrdinary() != args.columns.getAll() || !args.columns.getWithDefaultExpression().empty())
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS expressions for columns. "
+                                                       "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration");
+        }
 
         return std::make_shared<StorageKafka>(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name);
     };
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
index 9f617369859..51952ac1eb7 100644
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@@ -285,6 +285,56 @@ def avro_confluent_message(schema_registry_client, value):
 # Tests
 
 
+def test_kafka_prohibited_column_types(kafka_cluster):
+    def assert_returned_exception(e):
+        assert e.value.returncode == 36
+        assert (
+            "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS expressions for columns."
+            in str(e.value)
+        )
+
+    # check column with DEFAULT expression
+    with pytest.raises(QueryRuntimeException) as exception:
+        instance.query(
+            """
+                CREATE TABLE test.kafka (a Int, b Int DEFAULT 0)
+                ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
+                """
+        )
+    assert_returned_exception(exception)
+
+    # check EPHEMERAL
+    with pytest.raises(QueryRuntimeException) as exception:
+        instance.query(
+            """
+                CREATE TABLE test.kafka (a Int, b Int EPHEMERAL)
+                ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
+                """
+        )
+    assert_returned_exception(exception)
+
+    # check ALIAS
+    with pytest.raises(QueryRuntimeException) as exception:
+        instance.query(
+            """
+                CREATE TABLE test.kafka (a Int, b String Alias toString(a))
+                ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
+                """
+        )
+    assert_returned_exception(exception)
+
+    # check MATERIALIZED
+    # check ALIAS
+    with pytest.raises(QueryRuntimeException) as exception:
+        instance.query(
+            """
+                CREATE TABLE test.kafka (a Int, b String MATERIALIZED toString(a))
+                ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n')
+                """
+        )
+    assert_returned_exception(exception)
+
+
 def test_kafka_settings_old_syntax(kafka_cluster):
     assert TSV(
         instance.query(

From 5403360924ef984e8e8d7cb517f71ab2e6ea0f5a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 3 Mar 2023 16:25:42 +0100
Subject: [PATCH 204/333] Add real client (initiator server) address into the
 logs for interserver mode

It is useful to understand what is going on, in some obscure cases, for
instance if someone will copy configuration from the production to some
docker env, and then you will see docker's private network addresses
in the logs.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Server/TCPHandler.cpp | 29 +++++++++++++++++++----------
 src/Server/TCPHandler.h   |  2 ++
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index a307b472a64..f2b4284613a 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1208,14 +1208,7 @@ void TCPHandler::receiveHello()
 
     session = makeSession();
     auto & client_info = session->getClientInfo();
-
-    /// Extract the last entry from comma separated list of forwarded_for addresses.
-    /// Only the last proxy can be trusted (if any).
-    String forwarded_address = client_info.getLastForwardedFor();
-    if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false))
-        session->authenticate(user, password, Poco::Net::SocketAddress(forwarded_address, socket().peerAddress().port()));
-    else
-        session->authenticate(user, password, socket().peerAddress());
+    session->authenticate(user, password, getClientAddress(client_info));
 }
 
 void TCPHandler::receiveAddendum()
@@ -1501,11 +1494,16 @@ void TCPHandler::receiveQuery()
         /// so we should not rely on that. However, in this particular case we got client_info from other clickhouse-server, so it's ok.
         if (client_info.initial_user.empty())
         {
-            LOG_DEBUG(log, "User (no user, interserver mode)");
+            LOG_DEBUG(log, "User (no user, interserver mode) (client: {})", getClientAddress(client_info).toString());
         }
         else
         {
-            LOG_DEBUG(log, "User (initial, interserver mode): {}", client_info.initial_user);
+            LOG_DEBUG(log, "User (initial, interserver mode): {} (client: {})", client_info.initial_user, getClientAddress(client_info).toString());
+            /// In case of inter-server mode authorization is done with the
+            /// initial address of the client, not the real address from which
+            /// the query was come, since the real address is the address of
+            /// the initiator server, while we are interested in client's
+            /// address.
             session->authenticate(AlwaysAllowCredentials{client_info.initial_user}, client_info.initial_address);
         }
 #else
@@ -1991,4 +1989,15 @@ void TCPHandler::run()
     }
 }
 
+Poco::Net::SocketAddress TCPHandler::getClientAddress(const ClientInfo & client_info)
+{
+    /// Extract the last entry from comma separated list of forwarded_for addresses.
+    /// Only the last proxy can be trusted (if any).
+    String forwarded_address = client_info.getLastForwardedFor();
+    if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false))
+        return Poco::Net::SocketAddress(forwarded_address, socket().peerAddress().port());
+    else
+        return socket().peerAddress();
+}
+
 }
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index f06b0b060b3..e3673b213d5 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -273,6 +273,8 @@ private:
 
     /// This function is called from different threads.
     void updateProgress(const Progress & value);
+
+    Poco::Net::SocketAddress getClientAddress(const ClientInfo & client_info);
 };
 
 }

From b5771016f6d53f460b635f10538df60d575dac23 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 3 Mar 2023 15:46:41 +0000
Subject: [PATCH 205/333] Fix 01019_alter_materialized_view_consistent

---
 .../01019_alter_materialized_view_consistent.sh   | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh
index e90085f4e8e..496215ba492 100755
--- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh
+++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh
@@ -50,12 +50,19 @@ function insert_thread() {
 function alter_thread() {
     trap 'exit' INT
 
-    ALTER[0]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;"
-    ALTER[1]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"
+    # Generate random ALTERs, but make sure that at least one of them is for each source table.
+    for i in {0..5}; do
+        ALTER[$i]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;"
+    done
+    ALTER[$RANDOM % 3]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"
+    ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"
+    ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"
 
+    i=0
     while true; do
-        $CLICKHOUSE_CLIENT --allow_experimental_alter_materialized_view_structure=1 \
-        -q "${ALTER[$RANDOM % 2]}"
+        $CLICKHOUSE_CLIENT --allow_experimental_alter_materialized_view_structure=1 -q "${ALTER[$i % 6]}"
+        ((i=i+1))
+
         sleep "0.0$RANDOM"
 
         is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv;")

From a0f5756751a6cec5c03a56a5f1552113ffb57cfd Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 3 Mar 2023 16:28:41 +0000
Subject: [PATCH 206/333] Fix DelayedJoinedBlocksWorkerTransform for grace hash
 join

---
 src/Interpreters/GraceHashJoin.cpp            |  5 +---
 src/Interpreters/GraceHashJoin.h              |  8 ++----
 .../Transforms/JoiningTransform.cpp           | 27 +++++++++++++++++--
 .../25340_grace_hash_limit_race.reference     |  1 +
 .../25340_grace_hash_limit_race.sql           |  4 +--
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp
index 1b62939ac7b..7795061072c 100644
--- a/src/Interpreters/GraceHashJoin.cpp
+++ b/src/Interpreters/GraceHashJoin.cpp
@@ -10,7 +10,6 @@
 #include <Core/ProtocolDefines.h>
 #include <Disks/IVolume.h>
 #include <Disks/TemporaryFileOnDisk.h>
-#include <IO/WriteBufferFromTemporaryFile.h>
 #include <Common/logger_useful.h>
 #include <Common/thread_local_rng.h>
 
@@ -410,8 +409,6 @@ void GraceHashJoin::initialize(const Block & sample_block)
 
 void GraceHashJoin::joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed)
 {
-    std::shared_lock current_bucket_lock(current_bucket_mutex);
-
     if (block.rows() == 0)
     {
         hash_join->joinBlock(block, not_processed);
@@ -551,7 +548,7 @@ public:
 
 IBlocksStreamPtr GraceHashJoin::getDelayedBlocks()
 {
-    std::unique_lock current_bucket_lock(current_bucket_mutex);
+    std::lock_guard current_bucket_lock(current_bucket_mutex);
 
     if (current_bucket == nullptr)
         return nullptr;
diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index 0d6c4741b95..cbc0f2f3266 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -140,12 +140,8 @@ private:
 
     FileBucket * current_bucket = nullptr;
 
-    /* Function `getDelayedBlocks` should be a critical section.
-     * Also some `joinBlock` calls may be in progress and we need to wait for them to finish,
-     * because they may may use `hash_join`, but `getDelayedBlocks` switches it to another bucket.
-     * So, `joinBlock` acquires shared lock and getDelayedBlocks acquires exclusive lock.
-     */
-    mutable SharedMutex current_bucket_mutex;
+    /// Function `getDelayedBlocks` is a critical section, we process only one bucket at a time.
+    mutable std::mutex current_bucket_mutex;
 
     InMemoryJoinPtr hash_join;
     Block hash_join_sample_block;
diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
index c28a84e9d5d..bf80de67b42 100644
--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -318,12 +318,22 @@ DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(Block out
 
 IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
 {
+    auto & output = outputs.front();
+    auto & input = inputs.front();
+
+    if (output.isFinished())
+    {
+        input.close();
+        return Status::Finished;
+    }
+
+    if (!output.canPush())
+        return Status::PortFull;
+
     if (inputs.size() != 1 && outputs.size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have exactly one input port");
 
-    auto & output = outputs.front();
 
-    auto & input = inputs.front();
 
     if (output_chunk)
     {
@@ -397,6 +407,9 @@ DelayedJoinedBlocksTransform::DelayedJoinedBlocksTransform(size_t num_streams, J
 
 void DelayedJoinedBlocksTransform::work()
 {
+    if (finished)
+        return;
+
     delayed_blocks = join->getDelayedBlocks();
     finished = finished || delayed_blocks == nullptr;
 }
@@ -406,6 +419,14 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
 {
     for (auto & output : outputs)
     {
+        if (output.isFinished())
+        {
+            /// If at least one output is finished, then we have read all data from buckets.
+            /// Some workers can still can busy with joining the last chunk of data in memory,
+            /// but after that they also will finish when they will try to get next chunk.
+            finished = true;
+            continue;
+        }
         if (!output.canPush())
             return Status::PortFull;
     }
@@ -414,6 +435,8 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
     {
         for (auto & output : outputs)
         {
+            if (output.isFinished())
+                continue;
             Chunk chunk;
             chunk.setChunkInfo(std::make_shared<DelayedBlocksTask>());
             output.push(std::move(chunk));
diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.reference b/tests/queries/0_stateless/25340_grace_hash_limit_race.reference
index e69de29bb2d..83b33d238da 100644
--- a/tests/queries/0_stateless/25340_grace_hash_limit_race.reference
+++ b/tests/queries/0_stateless/25340_grace_hash_limit_race.reference
@@ -0,0 +1 @@
+1000
diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql b/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
index b12dd7bdbba..55262ab2455 100644
--- a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
+++ b/tests/queries/0_stateless/25340_grace_hash_limit_race.sql
@@ -6,11 +6,11 @@ INSERT INTO test_grace_hash SELECT number, number % 100 = 0 FROM numbers(100000)
 
 SET join_algorithm = 'grace_hash';
 
-SELECT * FROM (
+SELECT count() FROM (
     SELECT f.id FROM test_grace_hash AS f
     LEFT JOIN test_grace_hash AS d
     ON f.id = d.id
     LIMIT 1000
-) FORMAT Null;
+);
 
 DROP TABLE test_grace_hash;

From 9389cc29da406533ff589fde058a2f22fc1e9aa6 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 3 Mar 2023 16:48:47 +0000
Subject: [PATCH 207/333] better mutations of sparse columns

---
 .../Serializations/SerializationInfo.cpp      | 32 +++++++++++++++++--
 .../Serializations/SerializationInfo.h        |  6 +++-
 .../Serializations/SerializationInfoTuple.cpp | 18 ++++++++---
 .../Serializations/SerializationInfoTuple.h   |  6 +++-
 src/Storages/MergeTree/MutateTask.cpp         | 16 +++-------
 ...02662_sparse_columns_mutations_4.reference |  2 ++
 .../02662_sparse_columns_mutations_4.sql      | 21 ++++++++++++
 ...02662_sparse_columns_mutations_5.reference |  2 ++
 .../02662_sparse_columns_mutations_5.sql      | 21 ++++++++++++
 9 files changed, 103 insertions(+), 21 deletions(-)
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference
 create mode 100644 tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql

diff --git a/src/DataTypes/Serializations/SerializationInfo.cpp b/src/DataTypes/Serializations/SerializationInfo.cpp
index af3330d867d..4e5790ad58d 100644
--- a/src/DataTypes/Serializations/SerializationInfo.cpp
+++ b/src/DataTypes/Serializations/SerializationInfo.cpp
@@ -97,11 +97,37 @@ MutableSerializationInfoPtr SerializationInfo::clone() const
     return std::make_shared<SerializationInfo>(kind, settings, data);
 }
 
-std::shared_ptr<SerializationInfo> SerializationInfo::createWithType(const IDataType & type, const Settings & new_settings) const
+/// Returns true if all rows with default values of type 'lhs'
+/// are mapped to default values of type 'rhs' after conversion.
+static bool preserveDefaultsAfterConversion(const IDataType & lhs, const IDataType & rhs)
+{
+    if (lhs.equals(rhs))
+        return true;
+
+    bool lhs_is_columned_as_numeric = isColumnedAsNumber(lhs) || isColumnedAsDecimal(lhs);
+    bool rhs_is_columned_as_numeric = isColumnedAsNumber(rhs) || isColumnedAsDecimal(rhs);
+
+    if (lhs_is_columned_as_numeric && rhs_is_columned_as_numeric)
+        return true;
+
+    if (isStringOrFixedString(lhs) && isStringOrFixedString(rhs))
+        return true;
+
+    return false;
+}
+
+std::shared_ptr<SerializationInfo> SerializationInfo::createWithType(
+    const IDataType & old_type,
+    const IDataType & new_type,
+    const Settings & new_settings) const
 {
     auto new_kind = kind;
-    if (new_kind == ISerialization::Kind::SPARSE && !type.supportsSparseSerialization())
-        new_kind = ISerialization::Kind::DEFAULT;
+    if (new_kind == ISerialization::Kind::SPARSE)
+    {
+        if (!new_type.supportsSparseSerialization()
+            || !preserveDefaultsAfterConversion(old_type, new_type))
+            new_kind = ISerialization::Kind::DEFAULT;
+    }
 
     return std::make_shared<SerializationInfo>(new_kind, new_settings);
 }
diff --git a/src/DataTypes/Serializations/SerializationInfo.h b/src/DataTypes/Serializations/SerializationInfo.h
index 560156980db..5b802b379e1 100644
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@@ -60,7 +60,11 @@ public:
     virtual void replaceData(const SerializationInfo & other);
 
     virtual std::shared_ptr<SerializationInfo> clone() const;
-    virtual std::shared_ptr<SerializationInfo> createWithType(const IDataType & type, const Settings & new_settings) const;
+
+    virtual std::shared_ptr<SerializationInfo> createWithType(
+        const IDataType & old_type,
+        const IDataType & new_type,
+        const Settings & new_settings) const;
 
     virtual void serialializeKindBinary(WriteBuffer & out) const;
     virtual void deserializeFromKindsBinary(ReadBuffer & in);
diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
index 5724dd7a5c3..d36668f03b6 100644
--- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
@@ -97,16 +97,24 @@ MutableSerializationInfoPtr SerializationInfoTuple::clone() const
     return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
 }
 
-MutableSerializationInfoPtr SerializationInfoTuple::createWithType(const IDataType & type, const Settings & new_settings) const
+MutableSerializationInfoPtr SerializationInfoTuple::createWithType(
+    const IDataType & old_type,
+    const IDataType & new_type,
+    const Settings & new_settings) const
 {
-    const auto & type_tuple = assert_cast<const DataTypeTuple &>(type);
-    const auto & tuple_elements = type_tuple.getElements();
-    assert(elems.size() == tuple_elements.size());
+    const auto & old_tuple = assert_cast<const DataTypeTuple &>(old_type);
+    const auto & new_tuple = assert_cast<const DataTypeTuple &>(new_type);
+
+    const auto & old_elements = old_tuple.getElements();
+    const auto & new_elements = new_tuple.getElements();
+
+    assert(elems.size() == old_elements.size());
+    assert(elems.size() == new_elements.size());
 
     MutableSerializationInfos infos;
     infos.reserve(elems.size());
     for (size_t i = 0; i < elems.size(); ++i)
-        infos.push_back(elems[i]->createWithType(*tuple_elements[i], new_settings));
+        infos.push_back(elems[i]->createWithType(*old_elements[i], *new_elements[i], new_settings));
 
     return std::make_shared<SerializationInfoTuple>(std::move(infos), names, new_settings);
 }
diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.h b/src/DataTypes/Serializations/SerializationInfoTuple.h
index 4bcd14d34cb..a9f3bdb6c6e 100644
--- a/src/DataTypes/Serializations/SerializationInfoTuple.h
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.h
@@ -19,7 +19,11 @@ public:
     void replaceData(const SerializationInfo & other) override;
 
     MutableSerializationInfoPtr clone() const override;
-    MutableSerializationInfoPtr createWithType(const IDataType & type, const Settings & new_settings) const override;
+
+    MutableSerializationInfoPtr createWithType(
+        const IDataType & old_type,
+        const IDataType & new_type,
+        const Settings & new_settings) const override;
 
     void serialializeKindBinary(WriteBuffer & out) const override;
     void deserializeFromKindsBinary(ReadBuffer & in) override;
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 1239befdd67..94715822e52 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -219,18 +219,18 @@ getColumnsForNewDataPart(
             continue;
         }
 
-        auto old_type = part_columns.getPhysical(name);
+        auto old_type = part_columns.getPhysical(name).type;
         auto new_type = updated_header.getByName(new_name).type;
 
-        if (!new_type->supportsSparseSerialization())
-            continue;
-
         SerializationInfo::Settings settings
         {
             .ratio_of_defaults_for_sparse = source_part->storage.getSettings()->ratio_of_defaults_for_sparse_serialization,
             .choose_kind = false
         };
 
+        if (!new_type->supportsSparseSerialization() || settings.isAlwaysDefault())
+            continue;
+
         auto new_info = new_type->createSerializationInfo(settings);
         if (!old_info->structureEquals(*new_info))
         {
@@ -238,13 +238,7 @@ getColumnsForNewDataPart(
             continue;
         }
 
-        if (!old_info->hasCustomSerialization())
-        {
-            new_serialization_infos.emplace(new_name, old_info);
-            continue;
-        }
-
-        new_info = old_info->createWithType(*new_type, settings);
+        new_info = old_info->createWithType(*old_type, *new_type, settings);
         new_serialization_infos.emplace(new_name, std::move(new_info));
     }
 
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference
new file mode 100644
index 00000000000..2e24ab44f9a
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference
@@ -0,0 +1,2 @@
+UInt64	Sparse
+String	Default
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql
new file mode 100644
index 00000000000..039af658489
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql
@@ -0,0 +1,21 @@
+SET mutations_sync = 2;
+
+DROP TABLE IF EXISTS t_sparse_mutations_4;
+
+CREATE TABLE t_sparse_mutations_4 (k UInt64, v UInt64)
+ENGINE = MergeTree ORDER BY k
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
+
+INSERT INTO t_sparse_mutations_4 SELECT number, 0 FROM numbers(10000);
+
+SELECT type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_4' AND column = 'v' AND active
+ORDER BY name;
+
+ALTER TABLE t_sparse_mutations_4 MODIFY COLUMN v String;
+
+SELECT type, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_4' AND column = 'v' AND active
+ORDER BY name;
+
+DROP TABLE t_sparse_mutations_4;
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference
new file mode 100644
index 00000000000..698d61cbb24
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference
@@ -0,0 +1,2 @@
+Tuple(UInt64, UInt64)	Default	['1','2']	['UInt64','UInt64']	['Sparse','Sparse']
+Tuple(UInt64, String)	Default	['1','2']	['UInt64','String']	['Sparse','Default']
diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql
new file mode 100644
index 00000000000..79bac836bdc
--- /dev/null
+++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql
@@ -0,0 +1,21 @@
+SET mutations_sync = 2;
+
+DROP TABLE IF EXISTS t_sparse_mutations_5;
+
+CREATE TABLE t_sparse_mutations_5 (k UInt64, t Tuple(UInt64, UInt64))
+ENGINE = MergeTree ORDER BY k
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
+
+INSERT INTO t_sparse_mutations_5 SELECT number, (0, 0) FROM numbers(10000);
+
+SELECT type, serialization_kind, subcolumns.names, subcolumns.types, subcolumns.serializations FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_5' AND column = 't' AND active
+ORDER BY name;
+
+ALTER TABLE t_sparse_mutations_5 MODIFY COLUMN t Tuple(UInt64, String);
+
+SELECT type, serialization_kind, subcolumns.names, subcolumns.types, subcolumns.serializations FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_mutations_5' AND column = 't' AND active
+ORDER BY name;
+
+DROP TABLE t_sparse_mutations_5;

From bbbe529e249adb3847f45571b83086fc805c6071 Mon Sep 17 00:00:00 2001
From: HarryLeeIBM <hleeatwork@outlook.com>
Date: Fri, 3 Mar 2023 09:29:35 -0800
Subject: [PATCH 208/333] Fix farmhash for s390x

---
 contrib/libfarmhash/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/contrib/libfarmhash/CMakeLists.txt b/contrib/libfarmhash/CMakeLists.txt
index a0533a93f17..436bc3d0108 100644
--- a/contrib/libfarmhash/CMakeLists.txt
+++ b/contrib/libfarmhash/CMakeLists.txt
@@ -6,6 +6,10 @@ if (MSVC)
     target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1)
 endif ()
 
+if (ARCH_S390X)
+    add_compile_definitions(WORDS_BIGENDIAN)
+endif ()
+
 target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 
 add_library(ch_contrib::farmhash ALIAS _farmhash)

From 6c929737eb4d5c62377927889988d5e5dda9ba25 Mon Sep 17 00:00:00 2001
From: Dave Lahn <dave@davelahn.com>
Date: Fri, 3 Mar 2023 18:42:10 +0000
Subject: [PATCH 209/333] Add missing policy name element

The S3 cache example had a list of policies, but was missing the policy name. This caused this configuration to fail.
---
 docs/en/operations/storing-data.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 203fe4e42d2..cba666f73da 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -135,11 +135,13 @@ Example of configuration for versions later or equal to 22.8:
             </cache>
         </disks>
         <policies>
-            <volumes>
-                <main>
-                    <disk>cache</disk>
-                </main>
-            </volumes>
+            <s3-cache>
+                <volumes>
+                    <main>
+                        <disk>cache</disk>
+                    </main>
+                </volumes>
+            </s3-cache>
         <policies>
     </storage_configuration>
 ```

From 7997df5739e509132a596ec4782b39e6e389970c Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 3 Mar 2023 19:50:21 +0100
Subject: [PATCH 210/333] Update storing-data.md

---
 docs/en/operations/storing-data.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index cba666f73da..3f9a0f67187 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -161,11 +161,13 @@ Example of configuration for versions earlier than 22.8:
             </s3>
         </disks>
         <policies>
-            <volumes>
-                <main>
-                    <disk>s3</disk>
-                </main>
-            </volumes>
+            <s3-cache>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                </volumes>
+            </s3-cache>
         <policies>
     </storage_configuration>
 ```

From 5c4da5aa4a08ea2fe1cfa279fc1ec8c4e6e7f68f Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 3 Mar 2023 20:05:42 +0100
Subject: [PATCH 211/333] Use separate thread pool for IO operations for
 backups (#47174)

---
 programs/server/Server.cpp    |  6 ++++++
 src/Backups/BackupIO_S3.cpp   |  8 ++++----
 src/Core/ServerSettings.h     |  4 +++-
 src/IO/BackupIOThreadPool.cpp | 34 ++++++++++++++++++++++++++++++++++
 src/IO/BackupsIOThreadPool.h  | 20 ++++++++++++++++++++
 5 files changed, 67 insertions(+), 5 deletions(-)
 create mode 100644 src/IO/BackupIOThreadPool.cpp
 create mode 100644 src/IO/BackupsIOThreadPool.h

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 1486a51c710..57d361886d2 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -41,6 +41,7 @@
 #include <Common/TLDListsHolder.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
 #include <Core/ServerUUID.h>
+#include <IO/BackupsIOThreadPool.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/IOThreadPool.h>
@@ -773,6 +774,11 @@ try
         server_settings.max_io_thread_pool_free_size,
         server_settings.io_thread_pool_queue_size);
 
+    BackupsIOThreadPool::initialize(
+        server_settings.max_backups_io_thread_pool_size,
+        server_settings.max_backups_io_thread_pool_free_size,
+        server_settings.backups_io_thread_pool_queue_size);
+
     /// Initialize global local cache for remote filesystem.
     if (config().has("local_cache_for_remote_fs"))
     {
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index a303a0bc2d5..2f315e8d488 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -4,7 +4,7 @@
 #include <Common/quoteString.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Interpreters/Context.h>
-#include <IO/IOThreadPool.h>
+#include <IO/BackupsIOThreadPool.h>
 #include <IO/ReadBufferFromS3.h>
 #include <IO/WriteBufferFromS3.h>
 #include <IO/HTTPHeaderEntries.h>
@@ -167,7 +167,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
         std::string src_bucket = object_storage->getObjectsNamespace();
         auto file_path = fs::path(s3_uri.key) / dest_file_name;
         copyS3File(client, src_bucket, objects[0].absolute_path, src_offset, src_size, s3_uri.bucket, file_path, request_settings, {},
-                   threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
+                   threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
     }
 }
 
@@ -175,7 +175,7 @@ void BackupWriterS3::copyDataToFile(
     const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
 {
     copyDataToS3File(create_read_buffer, offset, size, client, s3_uri.bucket, fs::path(s3_uri.key) / dest_file_name, request_settings, {},
-                     threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
+                     threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
 }
 
 BackupWriterS3::~BackupWriterS3() = default;
@@ -222,7 +222,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
         request_settings,
         std::nullopt,
         DBMS_DEFAULT_BUFFER_SIZE,
-        threadPoolCallbackRunner<void>(IOThreadPool::get(), "BackupWriterS3"));
+        threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
 }
 
 void BackupWriterS3::removeFile(const String & file_name)
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 1e884266c17..abc5b4d14d8 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -21,6 +21,9 @@ namespace DB
     M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
     M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
     M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
+    M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
+    M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
+    M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
     M(Int32, max_connections, 1024, "Max server connections.", 0) \
     M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
     M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \
@@ -75,4 +78,3 @@ struct ServerSettings : public BaseSettings<ServerSettingsTraits>
 };
 
 }
-
diff --git a/src/IO/BackupIOThreadPool.cpp b/src/IO/BackupIOThreadPool.cpp
new file mode 100644
index 00000000000..067fc54b1ae
--- /dev/null
+++ b/src/IO/BackupIOThreadPool.cpp
@@ -0,0 +1,34 @@
+#include <IO/BackupsIOThreadPool.h>
+#include "Core/Field.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+std::unique_ptr<ThreadPool> BackupsIOThreadPool::instance;
+
+void BackupsIOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+{
+    if (instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is initialized twice");
+    }
+
+    instance = std::make_unique<ThreadPool>(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/);
+}
+
+ThreadPool & BackupsIOThreadPool::get()
+{
+    if (!instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is not initialized");
+    }
+
+    return *instance;
+}
+
+}
diff --git a/src/IO/BackupsIOThreadPool.h b/src/IO/BackupsIOThreadPool.h
new file mode 100644
index 00000000000..f5aae5741a8
--- /dev/null
+++ b/src/IO/BackupsIOThreadPool.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <Common/ThreadPool.h>
+
+namespace DB
+{
+
+/*
+ * ThreadPool used for the Backup IO.
+ */
+class BackupsIOThreadPool
+{
+    static std::unique_ptr<ThreadPool> instance;
+
+public:
+    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    static ThreadPool & get();
+};
+
+}

From ea8ae193d3ae40215493e450e34dc4a188a981b4 Mon Sep 17 00:00:00 2001
From: liding1992 <liding1992@foxmail.com>
Date: Thu, 2 Mar 2023 17:07:10 +0800
Subject: [PATCH 212/333] Fix the problem that the 'ReplicatedMergeTree' table
 failed to insert two similar data when the 'part_type' is configured as
 'InMemory' mode.

---
 src/Columns/ColumnString.h                          |  4 ++--
 ...cated_merge_tree_insert_zookeeper_long.reference |  1 +
 ..._replicated_merge_tree_insert_zookeeper_long.sql | 13 +++++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.reference
 create mode 100644 tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql

diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index aa251b1fda0..fc2ac2d2385 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -187,8 +187,8 @@ public:
 
     void updateHashFast(SipHash & hash) const override
     {
-        hash.update(reinterpret_cast<const char *>(offsets.data()), size() * sizeof(offsets[0]));
-        hash.update(reinterpret_cast<const char *>(chars.data()), size() * sizeof(chars[0]));
+        hash.update(reinterpret_cast<const char *>(offsets.data()), offsets.size() * sizeof(offsets[0]));
+        hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
     }
 
     void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.reference b/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.reference
new file mode 100644
index 00000000000..0cfbf08886f
--- /dev/null
+++ b/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.reference
@@ -0,0 +1 @@
+2
diff --git a/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql b/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql
new file mode 100644
index 00000000000..df07d2e2faf
--- /dev/null
+++ b/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS inmemory_test;
+
+CREATE TABLE inmemory_test (d Date, id String)
+ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/inmemory_test', 'r1')
+PARTITION BY toYYYYMMDD(d) ORDER BY (d, id)
+SETTINGS min_rows_for_compact_part = 10, index_granularity = 8192;
+
+INSERT INTO inmemory_test(d, id) VALUES('2023-01-01', 'abcdefghijklmnopqrstuvwxyz');
+INSERT INTO inmemory_test(d, id) VALUES('2023-01-01', 'a1234567890123456789012345');
+
+SELECT COUNT(1) FROM inmemory_test;
+
+DROP TABLE inmemory_test;

From b1e8b74b9cd81f6c8296b63515f91c893530b353 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 3 Mar 2023 15:54:41 +0100
Subject: [PATCH 213/333] Update
 02675_replicated_merge_tree_insert_zookeeper_long.sql

---
 .../02675_replicated_merge_tree_insert_zookeeper_long.sql       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql b/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql
index df07d2e2faf..194ea9bfcc1 100644
--- a/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql
+++ b/tests/queries/0_stateless/02675_replicated_merge_tree_insert_zookeeper_long.sql
@@ -1,3 +1,5 @@
+-- Tags: no-s3-storage
+
 DROP TABLE IF EXISTS inmemory_test;
 
 CREATE TABLE inmemory_test (d Date, id String)

From a70789c0b3ecf5384f0cd22fb214b24c9f6d5038 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 4 Mar 2023 09:15:33 +0100
Subject: [PATCH 214/333] Whitespace

---
 src/Backups/BackupCoordinationRemote.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 8c696057755..2ca06b76dc1 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -604,7 +604,7 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
         return false;
 
     auto zk = getZooKeeper();
-    std::string backup_stage_path = zookeeper_path +"/stage";
+    std::string backup_stage_path = zookeeper_path + "/stage";
 
     if (!zk->exists(root_zookeeper_path))
         zk->createAncestors(root_zookeeper_path);

From 694f5eb0a830bd96a8f95e51a042607e418df330 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 4 Mar 2023 12:10:27 +0300
Subject: [PATCH 215/333] Update ProcessList.cpp

---
 src/Interpreters/ProcessList.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 6f5553f58fd..37cb9ee599f 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -362,9 +362,11 @@ QueryStatus::QueryStatus(
 
 QueryStatus::~QueryStatus()
 {
+#if !defined(NDEBUG)
     /// Check that all executors were invalidated.
     for (const auto & e : executors)
         assert(!e->executor);
+#endif
 
     if (auto * memory_tracker = getMemoryTracker())
     {

From 849fac672ade1bfc668d785ff3875b5a9ad09156 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 4 Mar 2023 16:31:18 +0300
Subject: [PATCH 216/333] Update 02585_query_status_deadlock.sh

---
 tests/queries/0_stateless/02585_query_status_deadlock.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh
index 92dd05ef46c..227ecb1c1b2 100755
--- a/tests/queries/0_stateless/02585_query_status_deadlock.sh
+++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh
@@ -10,6 +10,7 @@ $CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q "
 create temporary table tmp as select * from numbers(500000000);
 select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null &
 
+$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 
 while true
 do

From 11930ad68503c30f8c6a91e5894c205e364b9414 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 4 Mar 2023 17:27:47 +0000
Subject: [PATCH 217/333] Docs: Update index support of has(), hasAny(),
 hasAll()

---
 .../mergetree-family/mergetree.md             | 45 ++++++++++---------
 .../MergeTree/MergeTreeIndexFullText.cpp      |  4 +-
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 7c0416d3ea4..fc8060077b0 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -450,29 +450,32 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY
 
 Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
 
-The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
+Indexes of type `set` can be utilized by all functions. The other index types are supported as follows:
 
 | Function (operator) / Index                                                                                | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
-|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
-| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals)                 | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals)         | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like)                          | ✔           | ✔      | ✔           | ✔           | ✗             |
-| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike)                    | ✔           | ✔      | ✔           | ✔           | ✗             |
-| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith)                              | ✔           | ✔      | ✔           | ✔           | ✗             |
-| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith)                                  | ✗           | ✗      | ✔           | ✔           | ✗             |
-| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany)      | ✗           | ✗      | ✔           | ✗           | ✗             |
-| [in](/docs/en/sql-reference/functions/in-functions#in-functions)                                        | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions)                                     | ✔           | ✔      | ✔           | ✔           | ✔             |
-| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less)                        | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater)                  | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals)       | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [empty](/docs/en/sql-reference/functions/array-functions#function-empty)                                | ✔           | ✔      | ✗           | ✗           | ✗             |
-| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty)                          | ✔           | ✔      | ✗           | ✗           | ✗             |
-| hasToken                                                                                                   | ✗           | ✗      | ✗           | ✔           | ✗             |
-| hasTokenOrNull                                                                                                   | ✗           | ✗      | ✗           | ✔           | ✗             |
-| hasTokenCaseInsensitive                                                                                                   | ✗           | ✗      | ✗           | ✔           | ✗             |
-| hasTokenCaseInsensitiveOrNull                                                                                                   | ✗           | ✗      | ✗           | ✔           | ✗             |
+|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|
+| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals)                | ✔           | ✔      | ✔          | ✔          | ✔            |
+| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals)    | ✔           | ✔      | ✔          | ✔          | ✔            |
+| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like)                         | ✔           | ✔      | ✔          | ✔          | ✗            |
+| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike)                   | ✔           | ✔      | ✔          | ✔          | ✗            |
+| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith)                             | ✔           | ✔      | ✔          | ✔          | ✗            |
+| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith)                                 | ✗           | ✗      | ✔          | ✔          | ✗            |
+| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany)     | ✗           | ✗      | ✔          | ✗          | ✗            |
+| [in](/docs/en/sql-reference/functions/in-functions#in-functions)                                           | ✔           | ✔      | ✔          | ✔          | ✔            |
+| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions)                                        | ✔           | ✔      | ✔          | ✔          | ✔            |
+| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less)                        | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater)                  | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals)       | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [empty](/docs/en/sql-reference/functions/array-functions#function-empty)                                   | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty)                             | ✔           | ✔      | ✗          | ✗          | ✗            |
+| [has](/docs/en/sql-reference/functions/array-functions#function-has)                                       | ✗           | ✗      | ✔          | ✔          | ✔            |
+| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny)                                 | ✗           | ✗      | ✗          | ✗          | ✔            |
+| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll)                                 | ✗           | ✗      | ✗          | ✗          | ✔            |
+| hasToken                                                                                                   | ✗           | ✗      | ✗          | ✔          | ✗            |
+| hasTokenOrNull                                                                                             | ✗           | ✗      | ✗          | ✔          | ✗            |
+| hasTokenCaseInsensitive                                                                                    | ✗           | ✗      | ✗          | ✔          | ✗            |
+| hasTokenCaseInsensitiveOrNull                                                                              | ✗           | ✗      | ✗          | ✔          | ✗            |
 
 Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 80c4c201c3f..fa1bd36f863 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -748,9 +748,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/)
 
         if (!data_type.isString() && !data_type.isFixedString())
             throw Exception(ErrorCodes::INCORRECT_QUERY,
-                            "Bloom filter index can be used only with `String`, `FixedString`, "
-                            "`LowCardinality(String)`, `LowCardinality(FixedString)` column "
-                            "or Array with `String` or `FixedString` values column.");
+                "Ngram and token bloom filter indexes can only be used with column types `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)`, `Array(String)` or `Array(FixedString)`");
     }
 
     if (index.type == NgramTokenExtractor::getName())

From 08ce7b031e95a6c61e965e07deb26c23174e7a95 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 4 Mar 2023 16:48:04 +0100
Subject: [PATCH 218/333] release shared ptrs after finishing a transaction

---
 src/Interpreters/MergeTreeTransaction.cpp | 14 ++++++++++++++
 src/Interpreters/MergeTreeTransaction.h   |  4 ++++
 src/Interpreters/TransactionLog.cpp       |  6 ++++--
 src/Interpreters/TransactionLog.h         |  2 +-
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp
index f16ece46530..50ecb061752 100644
--- a/src/Interpreters/MergeTreeTransaction.cpp
+++ b/src/Interpreters/MergeTreeTransaction.cpp
@@ -315,6 +315,15 @@ bool MergeTreeTransaction::rollback() noexcept
     return true;
 }
 
+void MergeTreeTransaction::afterFinalize()
+{
+    std::lock_guard lock{mutex};
+    /// Release shared pointers just in case
+    storages.clear();
+    mutations.clear();
+    finalized = true;
+}
+
 void MergeTreeTransaction::onException()
 {
     TransactionLog::instance().rollbackTransaction(shared_from_this());
@@ -331,6 +340,11 @@ String MergeTreeTransaction::dumpDescription() const
     }
 
     std::lock_guard lock{mutex};
+    if (finalized)
+    {
+        res += ", cannot dump detailed description, transaction is finalized";
+        return res;
+    }
 
     res += fmt::format(", affects {} tables:", storages.size());
 
diff --git a/src/Interpreters/MergeTreeTransaction.h b/src/Interpreters/MergeTreeTransaction.h
index f2d8d29d244..e5a80e03e18 100644
--- a/src/Interpreters/MergeTreeTransaction.h
+++ b/src/Interpreters/MergeTreeTransaction.h
@@ -65,6 +65,8 @@ private:
     scope_guard beforeCommit();
     void afterCommit(CSN assigned_csn) noexcept;
     bool rollback() noexcept;
+    void afterFinalize();
+
     void checkIsNotCancelled() const;
 
     mutable std::mutex mutex;
@@ -74,6 +76,8 @@ private:
     std::atomic<CSN> snapshot;
     const std::list<CSN>::iterator snapshot_in_use_it;
 
+    bool finalized TSA_GUARDED_BY(mutex) = false;
+
     /// Lists of changes made by transaction
     std::unordered_set<StoragePtr> storages TSA_GUARDED_BY(mutex);
     DataPartsVector creating_parts TSA_GUARDED_BY(mutex);
diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp
index 827957b8749..6257e617d4a 100644
--- a/src/Interpreters/TransactionLog.cpp
+++ b/src/Interpreters/TransactionLog.cpp
@@ -350,7 +350,7 @@ void TransactionLog::tryFinalizeUnknownStateTransactions()
         /// CSNs must be already loaded, only need to check if the corresponding mapping exists.
         if (auto csn = getCSN(txn->tid))
         {
-            finalizeCommittedTransaction(txn, csn, state_guard);
+            finalizeCommittedTransaction(txn.get(), csn, state_guard);
         }
         else
         {
@@ -431,7 +431,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool
             /// The only thing we can do is to postpone its finalization.
             {
                 std::lock_guard lock{running_list_mutex};
-                unknown_state_list.emplace_back(txn.get(), std::move(state_guard));
+                unknown_state_list.emplace_back(txn, std::move(state_guard));
             }
             log_updated_event->set();
             if (throw_on_unknown_status)
@@ -487,6 +487,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN
         }
     }
 
+    txn->afterFinalize();
     return allocated_csn;
 }
 
@@ -523,6 +524,7 @@ void TransactionLog::rollbackTransaction(const MergeTreeTransactionPtr & txn) no
     }
 
     tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::ROLLBACK, txn->tid);
+    txn->afterFinalize();
 }
 
 MergeTreeTransactionPtr TransactionLog::tryGetRunningTransaction(const TIDHash & tid)
diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h
index 64d02ad3ac5..6e8777d8519 100644
--- a/src/Interpreters/TransactionLog.h
+++ b/src/Interpreters/TransactionLog.h
@@ -177,7 +177,7 @@ private:
     /// Transactions that are currently processed
     TransactionsList running_list TSA_GUARDED_BY(running_list_mutex);
     /// If we lost connection on attempt to create csn- node then we don't know transaction's state.
-    using UnknownStateList = std::vector<std::pair<MergeTreeTransaction *, scope_guard>>;
+    using UnknownStateList = std::vector<std::pair<MergeTreeTransactionPtr, scope_guard>>;
     UnknownStateList unknown_state_list TSA_GUARDED_BY(running_list_mutex);
     UnknownStateList unknown_state_list_loaded TSA_GUARDED_BY(running_list_mutex);
     /// Ordered list of snapshots that are currently used by some transactions. Needed for background cleanup.

From b8d33f4d38b688ddcaffc372a50148db84eb6d25 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sun, 5 Mar 2023 13:32:04 +0100
Subject: [PATCH 219/333] Fix test

---
 .../test_disk_configuration/test.py           | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py
index 96cdb0213bc..6ebe994dc68 100644
--- a/tests/integration/test_disk_configuration/test.py
+++ b/tests/integration/test_disk_configuration/test.py
@@ -294,6 +294,12 @@ def test_merge_tree_custom_disk_setting(start_cluster):
         ).strip()
     )
 
+    node1.query(f"DROP TABLE {TABLE_NAME} SYNC")
+    node1.query(f"DROP TABLE {TABLE_NAME}_2 SYNC")
+    node1.query(f"DROP TABLE {TABLE_NAME}_3 SYNC")
+    node1.query(f"DROP TABLE {TABLE_NAME}_4 SYNC")
+    node2.query(f"DROP TABLE {TABLE_NAME}_4 SYNC")
+
 
 def test_merge_tree_nested_custom_disk_setting(start_cluster):
     node = cluster.instances["node1"]
@@ -307,9 +313,9 @@ def test_merge_tree_nested_custom_disk_setting(start_cluster):
     )
 
     node.query(
-        """
-        DROP TABLE IF EXISTS test;
-        CREATE TABLE test (a Int32)
+        f"""
+        DROP TABLE IF EXISTS {TABLE_NAME} SYNC;
+        CREATE TABLE {TABLE_NAME} (a Int32)
         ENGINE = MergeTree() order by tuple()
         SETTINGS disk = disk(
                 type=cache,
@@ -323,13 +329,13 @@ def test_merge_tree_nested_custom_disk_setting(start_cluster):
     """
     )
 
-    node.query("INSERT INTO test SELECT number FROM numbers(100)")
+    node.query(f"INSERT INTO {TABLE_NAME} SELECT number FROM numbers(100)")
     node.query("SYSTEM DROP FILESYSTEM CACHE")
 
     # Check cache is filled
     assert 0 == int(node.query("SELECT count() FROM system.filesystem_cache"))
-    assert 100 == int(node.query("SELECT count() FROM test"))
-    node.query("SELECT * FROM test")
+    assert 100 == int(node.query(f"SELECT count() FROM {TABLE_NAME}"))
+    node.query(f"SELECT * FROM {TABLE_NAME}")
     assert 0 < int(node.query("SELECT count() FROM system.filesystem_cache"))
 
     # Check s3 is filled
@@ -339,12 +345,13 @@ def test_merge_tree_nested_custom_disk_setting(start_cluster):
 
     node.restart_clickhouse()
 
-    assert 100 == int(node.query("SELECT count() FROM test"))
+    assert 100 == int(node.query(f"SELECT count() FROM {TABLE_NAME}"))
 
     expected = """
         SETTINGS disk = disk(type = cache, max_size = \\'[HIDDEN]\\', path = \\'[HIDDEN]\\', disk = disk(type = s3, endpoint = \\'[HIDDEN]\\'
     """
-    assert expected.strip() in node.query(f"SHOW CREATE TABLE test").strip()
+    assert expected.strip() in node.query(f"SHOW CREATE TABLE {TABLE_NAME}").strip()
+    node.query(f"DROP TABLE {TABLE_NAME} SYNC")
 
 
 def test_merge_tree_setting_override(start_cluster):
@@ -419,3 +426,4 @@ def test_merge_tree_setting_override(start_cluster):
     assert (
         len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) > 0
     )
+    node.query(f"DROP TABLE {TABLE_NAME} SYNC")

From 099013831a8f50fbb0f9cce97bde5db90251fceb Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Sun, 5 Mar 2023 16:15:03 +0100
Subject: [PATCH 220/333] Added batching for reads and retries for the most
 heavy function in backups (#47243)

---
 src/Backups/BackupCoordinationRemote.cpp | 150 +++++++++++++++++++++--
 src/Backups/BackupCoordinationRemote.h   |  18 ++-
 src/Backups/BackupsWorker.cpp            |  27 +++-
 src/Backups/IBackupCoordination.h        |  20 ++-
 src/Core/Settings.h                      |   4 +
 5 files changed, 199 insertions(+), 20 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 2ca06b76dc1..029a27d2712 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -166,13 +166,25 @@ namespace
 }
 
 BackupCoordinationRemote::BackupCoordinationRemote(
-    const String & root_zookeeper_path_, const String & backup_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_)
-    : root_zookeeper_path(root_zookeeper_path_)
+    const BackupKeeperSettings & keeper_settings_,
+    const String & root_zookeeper_path_,
+    const String & backup_uuid_,
+    zkutil::GetZooKeeper get_zookeeper_,
+    bool is_internal_)
+    : keeper_settings(keeper_settings_)
+    , root_zookeeper_path(root_zookeeper_path_)
     , zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
     , backup_uuid(backup_uuid_)
     , get_zookeeper(get_zookeeper_)
     , is_internal(is_internal_)
 {
+    zookeeper_retries_info = ZooKeeperRetriesInfo(
+        "BackupCoordinationRemote",
+        &Poco::Logger::get("BackupCoordinationRemote"),
+        keeper_settings.keeper_max_retries,
+        keeper_settings.keeper_retry_initial_backoff_ms,
+        keeper_settings.keeper_retry_max_backoff_ms);
+
     createRootNodes();
     stage_sync.emplace(
         zookeeper_path + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
@@ -486,19 +498,131 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
 
 std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
 {
-    auto zk = getZooKeeper();
-    std::vector<FileInfo> file_infos;
-    Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
-    for (const String & escaped_name : escaped_names)
+    /// There could be tons of files inside /file_names or /file_infos
+    /// Thus we use MultiRead requests for processing them
+    /// We also use [Zoo]Keeper retries and it should be safe, because
+    /// this function is called at the end after the actual copying is finished.
+
+    auto split_vector = [](Strings && vec, size_t max_batch_size) -> std::vector<Strings>
     {
-        String size_and_checksum = zk->get(zookeeper_path + "/file_names/" + escaped_name);
-        UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
-        FileInfo file_info;
-        if (size) /// we don't keep FileInfos for empty files
-            file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
-        file_info.file_name = unescapeForFileName(escaped_name);
-        file_infos.emplace_back(std::move(file_info));
+        std::vector<Strings> result;
+        size_t left_border = 0;
+
+        auto move_to_result = [&](auto && begin, auto && end)
+        {
+            auto batch = Strings();
+            batch.reserve(max_batch_size);
+            std::move(begin, end, std::back_inserter(batch));
+            result.push_back(std::move(batch));
+        };
+
+        if (max_batch_size == 0)
+        {
+            move_to_result(vec.begin(), vec.end());
+            return result;
+        }
+
+        for (size_t pos = 0; pos < vec.size(); ++pos)
+        {
+            if (pos >= left_border + max_batch_size)
+            {
+                move_to_result(vec.begin() + left_border, vec.begin() + pos);
+                left_border = pos;
+            }
+        }
+
+        if (vec.begin() + left_border != vec.end())
+            move_to_result(vec.begin() + left_border, vec.end());
+
+        return result;
+    };
+
+    std::vector<Strings> batched_escaped_names;
+    {
+        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getChildren", zookeeper_retries_info);
+        retries_ctl.retryLoop([&]()
+        {
+            auto zk = getZooKeeper();
+            batched_escaped_names = split_vector(zk->getChildren(zookeeper_path + "/file_names"), keeper_settings.batch_size_for_keeper_multiread);
+        });
     }
+
+    std::vector<FileInfo> file_infos;
+    file_infos.reserve(batched_escaped_names.size());
+
+    for (auto & batch : batched_escaped_names)
+    {
+        std::optional<zkutil::ZooKeeper::MultiGetResponse> sizes_and_checksums;
+        {
+            Strings file_names_paths;
+            file_names_paths.reserve(batch.size());
+            for (const String & escaped_name : batch)
+                file_names_paths.emplace_back(zookeeper_path + "/file_names/" + escaped_name);
+
+
+            ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getSizesAndChecksums", zookeeper_retries_info);
+            retries_ctl.retryLoop([&]()
+            {
+                auto zk = getZooKeeper();
+                sizes_and_checksums = zk->get(file_names_paths);
+            });
+        }
+
+        Strings non_empty_file_names;
+        Strings non_empty_file_infos_paths;
+        std::vector<FileInfo> non_empty_files_infos;
+
+        /// Process all files and understand whether there are some empty files
+        /// Save non empty file names for further batch processing
+        {
+            std::vector<FileInfo> empty_files_infos;
+            for (size_t i = 0; i < batch.size(); ++i)
+            {
+                auto file_name = batch[i];
+                if (sizes_and_checksums.value()[i].error != Coordination::Error::ZOK)
+                    throw zkutil::KeeperException(sizes_and_checksums.value()[i].error);
+                auto size_and_checksum = sizes_and_checksums.value()[i].data;
+                auto size = deserializeSizeAndChecksum(size_and_checksum).first;
+
+                if (size)
+                {
+                    /// Save it later for batch processing
+                    non_empty_file_names.emplace_back(file_name);
+                    non_empty_file_infos_paths.emplace_back(zookeeper_path + "/file_infos/" + size_and_checksum);
+                    continue;
+                }
+
+                /// File is empty
+                FileInfo empty_file_info;
+                empty_file_info.file_name = unescapeForFileName(file_name);
+                empty_files_infos.emplace_back(std::move(empty_file_info));
+            }
+
+            std::move(empty_files_infos.begin(), empty_files_infos.end(), std::back_inserter(file_infos));
+        }
+
+        std::optional<zkutil::ZooKeeper::MultiGetResponse> non_empty_file_infos_serialized;
+        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getFileInfos", zookeeper_retries_info);
+        retries_ctl.retryLoop([&]()
+        {
+            auto zk = getZooKeeper();
+            non_empty_file_infos_serialized = zk->get(non_empty_file_infos_paths);
+        });
+
+        /// Process non empty files
+        for (size_t i = 0; i < non_empty_file_names.size(); ++i)
+        {
+            FileInfo file_info;
+            if (non_empty_file_infos_serialized.value()[i].error != Coordination::Error::ZOK)
+                throw zkutil::KeeperException(non_empty_file_infos_serialized.value()[i].error);
+            file_info = deserializeFileInfo(non_empty_file_infos_serialized.value()[i].data);
+            file_info.file_name = unescapeForFileName(non_empty_file_names[i]);
+            non_empty_files_infos.emplace_back(std::move(file_info));
+        }
+
+        std::move(non_empty_files_infos.begin(), non_empty_files_infos.end(), std::back_inserter(file_infos));
+    }
+
     return file_infos;
 }
 
diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h
index c7260bcd237..23c76f5be47 100644
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@@ -4,6 +4,7 @@
 #include <Backups/BackupCoordinationReplicatedAccess.h>
 #include <Backups/BackupCoordinationReplicatedTables.h>
 #include <Backups/BackupCoordinationStageSync.h>
+#include <Storages/MergeTree/ZooKeeperRetries.h>
 
 
 namespace DB
@@ -16,7 +17,20 @@ constexpr size_t MAX_ZOOKEEPER_ATTEMPTS = 10;
 class BackupCoordinationRemote : public IBackupCoordination
 {
 public:
-    BackupCoordinationRemote(const String & root_zookeeper_path_, const String & backup_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_);
+    struct BackupKeeperSettings
+    {
+        UInt64 keeper_max_retries;
+        UInt64 keeper_retry_initial_backoff_ms;
+        UInt64 keeper_retry_max_backoff_ms;
+        UInt64 batch_size_for_keeper_multiread;
+    };
+
+    BackupCoordinationRemote(
+        const BackupKeeperSettings & keeper_settings_,
+        const String & root_zookeeper_path_,
+        const String & backup_uuid_,
+        zkutil::GetZooKeeper get_zookeeper_,
+        bool is_internal_);
     ~BackupCoordinationRemote() override;
 
     void setStage(const String & current_host, const String & new_stage, const String & message) override;
@@ -68,12 +82,14 @@ private:
     void prepareReplicatedTables() const;
     void prepareReplicatedAccess() const;
 
+    const BackupKeeperSettings keeper_settings;
     const String root_zookeeper_path;
     const String zookeeper_path;
     const String backup_uuid;
     const zkutil::GetZooKeeper get_zookeeper;
     const bool is_internal;
 
+    mutable ZooKeeperRetriesInfo zookeeper_retries_info;
     std::optional<BackupCoordinationStageSync> stage_sync;
 
     mutable std::mutex mutex;
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index 865151cc9ec..bdcff249e7d 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -38,12 +38,14 @@ namespace Stage = BackupCoordinationStage;
 
 namespace
 {
-    std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
+    std::shared_ptr<IBackupCoordination> makeBackupCoordination(std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings, String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
     {
         if (!root_zk_path.empty())
         {
+            if (!keeper_settings.has_value())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter keeper_settings is empty while root_zk_path is not. This is bug");
             auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
-            return std::make_shared<BackupCoordinationRemote>(root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
+            return std::make_shared<BackupCoordinationRemote>(*keeper_settings, root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
         }
         else
         {
@@ -169,7 +171,15 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
         /// if it's not created here. However to handle errors better it's better to make a coordination here because this way
         /// if an exception will be thrown in startMakingBackup() other hosts will know about that.
         root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
-        backup_coordination = makeBackupCoordination(root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
+
+        BackupCoordinationRemote::BackupKeeperSettings keeper_settings
+        {
+            .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
+            .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
+            .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
+            .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
+        };
+        backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
     }
 
     auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
@@ -265,10 +275,17 @@ void BackupsWorker::doBackup(
             context->checkAccess(required_access);
 
         String root_zk_path;
-
+        std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings;
         ClusterPtr cluster;
         if (on_cluster)
         {
+            keeper_settings = BackupCoordinationRemote::BackupKeeperSettings
+            {
+                .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
+                .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
+                .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
+                .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
+            };
             root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
             backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
             cluster = context->getCluster(backup_query->cluster);
@@ -277,7 +294,7 @@ void BackupsWorker::doBackup(
 
         /// Make a backup coordination.
         if (!backup_coordination)
-            backup_coordination = makeBackupCoordination(root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
+            backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
 
         if (!allow_concurrent_backups && backup_coordination->hasConcurrentBackups(std::ref(num_active_backups)))
             throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h
index f5fa01a1530..b75d856b50f 100644
--- a/src/Backups/IBackupCoordination.h
+++ b/src/Backups/IBackupCoordination.h
@@ -1,7 +1,9 @@
 #pragma once
 
-#include <Core/Types.h>
 #include <optional>
+#include <fmt/format.h>
+#include <Common/hex.h>
+#include <Core/Types.h>
 
 
 namespace DB
@@ -85,6 +87,22 @@ public:
 
         /// Position in the archive.
         UInt64 pos_in_archive = static_cast<UInt64>(-1);
+
+        /// Note: this format doesn't allow to parse data back
+        /// It is useful only for debugging purposes
+        [[ maybe_unused ]] String describe()
+        {
+            String result;
+            result += fmt::format("file_name: {};\n", file_name);
+            result += fmt::format("size: {};\n", size);
+            result += fmt::format("checksum: {};\n", getHexUIntLowercase(checksum));
+            result += fmt::format("base_size: {};\n", base_size);
+            result += fmt::format("base_checksum: {};\n", getHexUIntLowercase(checksum));
+            result += fmt::format("data_file_name: {};\n", data_file_name);
+            result += fmt::format("archive_suffix: {};\n", archive_suffix);
+            result += fmt::format("pos_in_archive: {};\n", pos_in_archive);
+            return result;
+        }
     };
 
     /// Adds file information.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index a65f2ccb60f..aeb6b5b1130 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -414,6 +414,10 @@ class IColumn;
     \
     M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
     M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
+    M(UInt64, backup_keeper_max_retries, 20, "Max retries for keeper operations during backup", 0) \
+    M(UInt64, backup_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup", 0) \
+    M(UInt64, backup_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup", 0) \
+    M(UInt64, backup_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup", 0) \
     \
     M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
     M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \

From c38d820d0f9b71156a01a1a20d16b8260c000c03 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Sun, 5 Mar 2023 16:45:17 +0000
Subject: [PATCH 221/333] Add 4LW for cleaning resource

---
 programs/keeper/CMakeLists.txt            |  1 +
 src/Coordination/CoordinationSettings.cpp |  2 +-
 src/Coordination/FourLetterCommand.cpp    |  9 ++++++++
 src/Coordination/FourLetterCommand.h      | 13 ++++++++++-
 src/Coordination/KeeperDispatcher.cpp     | 27 ++++++++++++++++++++++-
 src/Coordination/KeeperDispatcher.h       |  2 ++
 6 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 9b01e6920a4..761335fb707 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -128,6 +128,7 @@ if (BUILD_STANDALONE_KEEPER)
             ch_contrib::lz4
             ch_contrib::zstd
             ch_contrib::cityhash
+            ch_contrib::jemalloc
             common ch_contrib::double_conversion
             ch_contrib::dragonbox_to_chars
             pcg_random
diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index f5c79d3be7a..5e1ac1e2d7f 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
 }
 
 
-const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc";
+const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 6157daad1cd..8a7fdb82fb7 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -148,6 +148,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr recalculate_command = std::make_shared<RecalculateCommand>(keeper_dispatcher);
         factory.registerCommand(recalculate_command);
 
+        FourLetterCommandPtr clean_resources_command = std::make_shared<CleanResourcesCommand>(keeper_dispatcher);
+        factory.registerCommand(clean_resources_command);
+
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
     }
@@ -524,4 +527,10 @@ String RecalculateCommand::run()
     return "ok";
 }
 
+String CleanResourcesCommand::run()
+{
+    keeper_dispatcher.cleanResources();
+    return "ok";
+}
+
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index e1fe0333081..c1a91303c05 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -377,7 +377,6 @@ struct RequestLeaderCommand : public IFourLetterCommand
     ~RequestLeaderCommand() override = default;
 };
 
-/// Request to be leader.
 struct RecalculateCommand : public IFourLetterCommand
 {
     explicit RecalculateCommand(KeeperDispatcher & keeper_dispatcher_)
@@ -390,4 +389,16 @@ struct RecalculateCommand : public IFourLetterCommand
     ~RecalculateCommand() override = default;
 };
 
+struct CleanResourcesCommand : public IFourLetterCommand
+{
+    explicit CleanResourcesCommand(KeeperDispatcher & keeper_dispatcher_)
+        : IFourLetterCommand(keeper_dispatcher_)
+    {
+    }
+
+    String name() override { return "clrs"; }
+    String run() override;
+    ~CleanResourcesCommand() override = default;
+};
+
 }
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 06c693e45be..a6d16334924 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -9,7 +9,7 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/checkStackSize.h>
 #include <Common/CurrentMetrics.h>
-
+#include <Common/ProfileEvents.h>
 
 #include <future>
 #include <chrono>
@@ -17,12 +17,26 @@
 #include <iterator>
 #include <limits>
 
+#if USE_JEMALLOC
+#    include <jemalloc/jemalloc.h>
+
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
+
+#endif
+
 namespace CurrentMetrics
 {
     extern const Metric KeeperAliveConnections;
     extern const Metric KeeperOutstandingRequets;
 }
 
+namespace ProfileEvents
+{
+    extern const Event MemoryAllocatorPurge;
+    extern const Event MemoryAllocatorPurgeTimeMicroseconds;
+}
+
 namespace fs = std::filesystem;
 
 namespace DB
@@ -753,4 +767,15 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const
     return result;
 }
 
+void KeeperDispatcher::cleanResources()
+{
+#if USE_JEMALLOC
+    LOG_TRACE(&Poco::Logger::get("KeeperDispatcher"), "Purging unused memory");
+    Stopwatch watch;
+    mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0);
+    ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge);
+    ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds());
+#endif
+}
+
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 90965d0934e..9371d2fbbac 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -230,6 +230,8 @@ public:
     {
         return server->recalculateStorageStats();
     }
+
+    static void cleanResources();
 };
 
 }

From 3a184880c6488a5027416315ec97ee06a93f1917 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Sun, 5 Mar 2023 17:18:53 +0000
Subject: [PATCH 222/333] Add test for clrs

---
 .../test_keeper_four_word_command/test.py     | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 04f6800b92b..d3fcfcc3014 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -679,3 +679,41 @@ def test_cmd_rqld(started_cluster):
                     + " does not become leader after 30s, maybe there is something wrong."
                 )
         assert keeper_utils.is_leader(cluster, node)
+
+
+def test_cmd_clrs(started_cluster):
+    def get_memory_purges():
+        return node1.query(
+            "SELECT value FROM system.events WHERE event = 'MemoryAllocatorPurge' SETTINGS system_events_show_zero_values = 1"
+        )
+
+    zk = None
+    try:
+        wait_nodes()
+
+        zk = get_fake_zk(node1.name, timeout=30.0)
+
+        paths = [f"/clrs_{i}" for i in range(10000)]
+
+        # we only count the events because we cannot reliably test memory usage of Keeper
+        # but let's create and delete nodes so the first purge needs to release some memory
+        create_transaction = zk.transaction()
+        for path in paths:
+            create_transaction.create(path)
+        create_transaction.commit()
+
+        delete_transaction = zk.transaction()
+        for path in paths:
+            delete_transaction.delete(path)
+        delete_transaction.commit()
+
+        # repeat multiple times to make sure MemoryAllocatorPurge isn't increased because of other reasons
+        for _ in range(5):
+            prev_purges = int(get_memory_purges())
+            keeper_utils.send_4lw_cmd(cluster, node1, cmd="clrs")
+            current_purges = int(get_memory_purges())
+            assert current_purges > prev_purges
+            prev_purges = current_purges
+
+    finally:
+        destroy_zk_client(zk)

From 4f85b733f13f0b3780705e55f4e36dc910d9021b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 5 Mar 2023 20:19:33 +0300
Subject: [PATCH 223/333] Use string concatenation for XML serialization
 (#47251)

---
 src/Backups/BackupImpl.cpp | 66 ++++++++++++++++++++------------------
 src/IO/Operators.h         |  8 +++++
 2 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index e4c85bec496..b5f48a1a277 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -16,11 +16,11 @@
 #include <IO/Archives/createArchiveWriter.h>
 #include <IO/ConcatSeekableReadBuffer.h>
 #include <IO/HashingReadBuffer.h>
-#include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <IO/SeekableReadBuffer.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
 #include <IO/copyData.h>
 #include <Poco/Util/XMLConfiguration.h>
 #include <Poco/DOM/DOMParser.h>
@@ -317,11 +317,19 @@ void BackupImpl::writeBackupMetadata()
 {
     assert(!is_internal_backup);
 
-    Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
-    config->setInt("version", CURRENT_BACKUP_VERSION);
-    config->setBool("deduplicate_files", deduplicate_files);
-    config->setString("timestamp", toString(LocalDateTime{timestamp}));
-    config->setString("uuid", toString(*uuid));
+    checkLockFile(true);
+
+    std::unique_ptr<WriteBuffer> out;
+    if (use_archives)
+        out = getArchiveWriter("")->writeFile(".backup");
+    else
+        out = writer->writeFile(".backup");
+
+    *out << "<config>";
+    *out << "<version>" << CURRENT_BACKUP_VERSION << "</version>";
+    *out << "<deduplicate_files>" << deduplicate_files << "</deduplicate_files>";
+    *out << "<timestamp>" << toString(LocalDateTime{timestamp}) << "</timestamp>";
+    *out << "<uuid>" << toString(*uuid) << "</uuid>";
 
     auto all_file_infos = coordination->getAllFileInfos();
 
@@ -336,8 +344,8 @@ void BackupImpl::writeBackupMetadata()
 
         if (base_backup_in_use)
         {
-            config->setString("base_backup", base_backup_info->toString());
-            config->setString("base_backup_uuid", toString(*base_backup_uuid));
+            *out << "<base_backup>" << xml << base_backup_info->toString() << "</base_backup>";
+            *out << "<base_backup_uuid>" << toString(*base_backup_uuid) << "</base_backup_uuid>";
         }
     }
 
@@ -346,31 +354,32 @@ void BackupImpl::writeBackupMetadata()
     num_entries = 0;
     size_of_entries = 0;
 
-    for (size_t i = 0; i != all_file_infos.size(); ++i)
+    *out << "<contents>";
+    for (const auto & info : all_file_infos)
     {
-        const auto & info = all_file_infos[i];
-        String prefix = i ? "contents.file[" + std::to_string(i) + "]." : "contents.file.";
-        config->setString(prefix + "name", info.file_name);
-        config->setUInt64(prefix + "size", info.size);
+        *out << "<file>";
+
+        *out << "<name>" << xml << info.file_name << "</name>";
+        *out << "<size>" << info.size << "</size>";
 
         if (info.size)
         {
-            config->setString(prefix + "checksum", hexChecksum(info.checksum));
+            *out << "<checksum>" << hexChecksum(info.checksum) << "</checksum>";
             if (info.base_size)
             {
-                config->setBool(prefix + "use_base", true);
+                *out << "<use_base>true</use_base>";
                 if (info.base_size != info.size)
                 {
-                    config->setUInt64(prefix + "base_size", info.base_size);
-                    config->setString(prefix + "base_checksum", hexChecksum(info.base_checksum));
+                    *out << "<base_size>" << info.base_size << "</base_size>";
+                    *out << "<base_checksum>" << hexChecksum(info.base_checksum) << "</base_checksum>";
                 }
             }
             if (!info.data_file_name.empty() && (info.data_file_name != info.file_name))
-                config->setString(prefix + "data_file", info.data_file_name);
+                *out << "<data_file>" << xml << info.data_file_name << "</data_file>";
             if (!info.archive_suffix.empty())
-                config->setString(prefix + "archive_suffix", info.archive_suffix);
+                *out << "<archive_suffix>" << xml << info.archive_suffix << "</archive_suffix>";
             if (info.pos_in_archive != static_cast<size_t>(-1))
-                config->setUInt64(prefix + "pos_in_archive", info.pos_in_archive);
+                *out << "<pos_in_archive>" << info.pos_in_archive << "</pos_in_archive>";
         }
 
         total_size += info.size;
@@ -380,23 +389,16 @@ void BackupImpl::writeBackupMetadata()
             ++num_entries;
             size_of_entries += info.size - info.base_size;
         }
+
+        *out << "</file>";
     }
+    *out << "</contents>";
 
-    std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    config->save(stream);
-    String str = stream.str();
+    *out << "</config>";
 
-    checkLockFile(true);
-
-    std::unique_ptr<WriteBuffer> out;
-    if (use_archives)
-        out = getArchiveWriter("")->writeFile(".backup");
-    else
-        out = writer->writeFile(".backup");
-    out->write(str.data(), str.size());
     out->finalize();
 
-    uncompressed_size = size_of_entries + str.size();
+    uncompressed_size = size_of_entries + out->count();
 }
 
 
diff --git a/src/IO/Operators.h b/src/IO/Operators.h
index 06ff20c43e8..185745e8415 100644
--- a/src/IO/Operators.h
+++ b/src/IO/Operators.h
@@ -30,11 +30,13 @@ enum EscapeManip        { escape };           /// For strings - escape special c
 enum QuoteManip         { quote };            /// For strings, dates, datetimes - enclose in single quotes with escaping. In the rest, as usual.
 enum DoubleQuoteManip   { double_quote };     /// For strings, dates, datetimes - enclose in double quotes with escaping. In the rest, as usual.
 enum BinaryManip        { binary };           /// Output in binary format.
+enum XMLManip           { xml };              /// Output strings with XML escaping.
 
 struct EscapeManipWriteBuffer        : std::reference_wrapper<WriteBuffer> { using std::reference_wrapper<WriteBuffer>::reference_wrapper; };
 struct QuoteManipWriteBuffer         : std::reference_wrapper<WriteBuffer> { using std::reference_wrapper<WriteBuffer>::reference_wrapper; };
 struct DoubleQuoteManipWriteBuffer   : std::reference_wrapper<WriteBuffer> { using std::reference_wrapper<WriteBuffer>::reference_wrapper; };
 struct BinaryManipWriteBuffer        : std::reference_wrapper<WriteBuffer> { using std::reference_wrapper<WriteBuffer>::reference_wrapper; };
+struct XMLManipWriteBuffer           : std::reference_wrapper<WriteBuffer> { using std::reference_wrapper<WriteBuffer>::reference_wrapper; };
 
 struct EscapeManipReadBuffer         : std::reference_wrapper<ReadBuffer> { using std::reference_wrapper<ReadBuffer>::reference_wrapper; };
 struct QuoteManipReadBuffer          : std::reference_wrapper<ReadBuffer> { using std::reference_wrapper<ReadBuffer>::reference_wrapper; };
@@ -48,11 +50,13 @@ inline EscapeManipWriteBuffer      operator<< (WriteBuffer & buf, EscapeManip)
 inline QuoteManipWriteBuffer       operator<< (WriteBuffer & buf, QuoteManip)       { return buf; }
 inline DoubleQuoteManipWriteBuffer operator<< (WriteBuffer & buf, DoubleQuoteManip) { return buf; }
 inline BinaryManipWriteBuffer      operator<< (WriteBuffer & buf, BinaryManip)      { return buf; }
+inline XMLManipWriteBuffer         operator<< (WriteBuffer & buf, XMLManip)         { return buf; }
 
 template <typename T> WriteBuffer & operator<< (EscapeManipWriteBuffer buf,        const T & x) { writeText(x, buf.get());         return buf; }
 template <typename T> WriteBuffer & operator<< (QuoteManipWriteBuffer buf,         const T & x) { writeQuoted(x, buf.get());       return buf; }
 template <typename T> WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf,   const T & x) { writeDoubleQuoted(x, buf.get()); return buf; }
 template <typename T> WriteBuffer & operator<< (BinaryManipWriteBuffer buf,        const T & x) { writeBinary(x, buf.get());       return buf; }
+template <typename T> WriteBuffer & operator<< (XMLManipWriteBuffer buf,           const T & x) { writeText(x, buf.get());         return buf; }
 
 inline  WriteBuffer & operator<< (EscapeManipWriteBuffer buf, const String & x)   { writeEscapedString(x, buf); return buf; }
 inline  WriteBuffer & operator<< (EscapeManipWriteBuffer buf, std::string_view x) { writeEscapedString(x, buf); return buf; }
@@ -63,6 +67,10 @@ inline WriteBuffer & operator<< (QuoteManipWriteBuffer buf,       const char * x
 inline WriteBuffer & operator<< (DoubleQuoteManipWriteBuffer buf, const char * x) { writeAnyQuotedString<'"'>(x, x + strlen(x), buf.get()); return buf; }
 inline WriteBuffer & operator<< (BinaryManipWriteBuffer buf,      const char * x) { writeStringBinary(x, buf.get()); return buf; }
 
+inline  WriteBuffer & operator<< (XMLManipWriteBuffer buf, std::string_view x) { writeXMLStringForTextElementOrAttributeValue(x, buf); return buf; }
+inline  WriteBuffer & operator<< (XMLManipWriteBuffer buf, StringRef x)        { writeXMLStringForTextElementOrAttributeValue(x.toView(), buf); return buf; }
+inline  WriteBuffer & operator<< (XMLManipWriteBuffer buf, const char * x)     { writeXMLStringForTextElementOrAttributeValue(std::string_view(x), buf); return buf; }
+
 /// The manipulator calls the WriteBuffer method `next` - this makes the buffer reset. For nested buffers, the reset is not recursive.
 enum FlushManip { flush };
 

From d4f1ac60bc2fb3a8ffb20491705c3ffa823e7b62 Mon Sep 17 00:00:00 2001
From: Vladimir C <vdimir@clickhouse.com>
Date: Mon, 6 Mar 2023 10:30:51 +0100
Subject: [PATCH 224/333] Update comment in alter_materialized_view_consistent

---
 .../0_stateless/01019_alter_materialized_view_consistent.sh      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh
index 496215ba492..3a2eac1f38f 100755
--- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh
+++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh
@@ -54,6 +54,7 @@ function alter_thread() {
     for i in {0..5}; do
         ALTER[$i]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;"
     done
+    # Insert 3 ALTERs to src_b, one in the first half of the array and two in arbitrary positions.
     ALTER[$RANDOM % 3]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"
     ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"
     ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;"

From 48f644a95859af19efa3999369214755e3f81edb Mon Sep 17 00:00:00 2001
From: Vladimir C <vdimir@clickhouse.com>
Date: Mon, 6 Mar 2023 10:38:21 +0100
Subject: [PATCH 225/333] Fix style in JoiningTransform.cpp

---
 src/Processors/Transforms/JoiningTransform.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
index bf80de67b42..120ff51cad1 100644
--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -333,8 +333,6 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
     if (inputs.size() != 1 && outputs.size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have exactly one input port");
 
-
-
     if (output_chunk)
     {
         input.setNotNeeded();
@@ -414,7 +412,6 @@ void DelayedJoinedBlocksTransform::work()
     finished = finished || delayed_blocks == nullptr;
 }
 
-
 IProcessor::Status DelayedJoinedBlocksTransform::prepare()
 {
     for (auto & output : outputs)

From 56e8547f7e1573a1f60188e6cc7223327c2c3755 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 6 Mar 2023 10:47:32 +0000
Subject: [PATCH 226/333] Do not apply the optimization when plan is built only
 for analysis

---
 src/Planner/PlannerJoinTree.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 06dc5b70bc1..6f818e2c8f7 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -410,13 +410,12 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
         }
 
         /// Apply trivial_count optimization if possible
-        bool is_trivial_count_applied = is_single_table_expression && table_node && select_query_info.has_aggregates
+        bool is_trivial_count_applied = !select_query_options.only_analyze && is_single_table_expression && table_node && select_query_info.has_aggregates
             && applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getQueryContext(), columns_names);
 
         if (is_trivial_count_applied)
         {
-            if (!select_query_options.only_analyze)
-                from_stage = QueryProcessingStage::WithMergeableState;
+            from_stage = QueryProcessingStage::WithMergeableState;
         }
         else
         {

From eb2ed1b123f76be4d23e61a97e6a958cecee2e36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 3 Mar 2023 13:40:16 +0100
Subject: [PATCH 227/333] Add support for different expected errors

---
 src/Client/ClientBase.cpp                     | 43 +++++++-------
 src/Client/TestHint.cpp                       | 45 +++++++++++----
 src/Client/TestHint.h                         | 57 +++++++++++++++++--
 .../01470_columns_transformers.sql            |  8 +--
 4 files changed, 111 insertions(+), 42 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 53eb5080130..b5c662b4a80 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1834,7 +1834,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
     {
         /// disable logs if expects errors
         TestHint test_hint(all_queries_text);
-        if (test_hint.clientError() || test_hint.serverError())
+        if (!test_hint.clientErrors().empty() || !test_hint.serverErrors().empty())
             processTextAsSingleQuery("SET send_logs_level = 'fatal'");
     }
 
@@ -1876,17 +1876,18 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 // the query ends because we failed to parse it, so we consume
                 // the entire line.
                 TestHint hint(String(this_query_begin, this_query_end - this_query_begin));
-                if (hint.serverError())
+                if (!hint.serverErrors().empty())
                 {
                     // Syntax errors are considered as client errors
-                    current_exception->addMessage("\nExpected server error '{}'.", hint.serverError());
+                    current_exception->addMessage("\nExpected server error: {}.", hint.serverErrors());
                     current_exception->rethrow();
                 }
 
-                if (hint.clientError() != current_exception->code())
+                if (std::find(hint.clientErrors().begin(), hint.clientErrors().end(), current_exception->code())
+                    == hint.clientErrors().end())
                 {
-                    if (hint.clientError())
-                        current_exception->addMessage("\nExpected client error: " + std::to_string(hint.clientError()));
+                    if (!hint.clientErrors().empty())
+                        current_exception->addMessage("\nExpected client error: {}.", hint.clientErrors());
 
                     current_exception->rethrow();
                 }
@@ -1935,37 +1936,41 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 bool error_matches_hint = true;
                 if (have_error)
                 {
-                    if (test_hint.serverError())
+                    if (!test_hint.serverErrors().empty())
                     {
                         if (!server_exception)
                         {
                             error_matches_hint = false;
                             fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n",
-                                       test_hint.serverError(), full_query);
+                                       test_hint.serverErrors(), full_query);
                         }
-                        else if (server_exception->code() != test_hint.serverError())
+                        else if (
+                            std::find(test_hint.serverErrors().begin(), test_hint.serverErrors().end(), server_exception->code())
+                            == test_hint.serverErrors().end())
                         {
                             error_matches_hint = false;
                             fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n",
-                                       test_hint.serverError(), server_exception->code(), full_query);
+                                       test_hint.serverErrors(), server_exception->code(), full_query);
                         }
                     }
-                    if (test_hint.clientError())
+                    if (!test_hint.clientErrors().empty())
                     {
                         if (!client_exception)
                         {
                             error_matches_hint = false;
                             fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n",
-                                       test_hint.clientError(), full_query);
+                                       test_hint.clientErrors(), full_query);
                         }
-                        else if (client_exception->code() != test_hint.clientError())
+                        else if (
+                            std::find(test_hint.clientErrors().begin(), test_hint.clientErrors().end(), client_exception->code())
+                            == test_hint.clientErrors().end())
                         {
                             error_matches_hint = false;
                             fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n",
-                                       test_hint.clientError(), client_exception->code(), full_query);
+                                       test_hint.clientErrors(), client_exception->code(), full_query);
                         }
                     }
-                    if (!test_hint.clientError() && !test_hint.serverError())
+                    if (test_hint.clientErrors().empty() && test_hint.serverErrors().empty())
                     {
                         // No error was expected but it still occurred. This is the
                         // default case without test hint, doesn't need additional
@@ -1975,19 +1980,19 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 }
                 else
                 {
-                    if (test_hint.clientError())
+                    if (!test_hint.clientErrors().empty())
                     {
                         error_matches_hint = false;
                         fmt::print(stderr,
                                    "The query succeeded but the client error '{}' was expected (query: {}).\n",
-                                   test_hint.clientError(), full_query);
+                                   test_hint.clientErrors(), full_query);
                     }
-                    if (test_hint.serverError())
+                    if (!test_hint.serverErrors().empty())
                     {
                         error_matches_hint = false;
                         fmt::print(stderr,
                                    "The query succeeded but the server error '{}' was expected (query: {}).\n",
-                                   test_hint.serverError(), full_query);
+                                   test_hint.serverErrors(), full_query);
                     }
                 }
 
diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp
index f6d1e5d73c3..adaae5fe5ee 100644
--- a/src/Client/TestHint.cpp
+++ b/src/Client/TestHint.cpp
@@ -6,25 +6,46 @@
 #include <IO/ReadHelpers.h>
 #include <Parsers/Lexer.h>
 
+namespace DB::ErrorCodes
+{
+extern const int CANNOT_PARSE_TEXT;
+}
+
 namespace
 {
 
 /// Parse error as number or as a string (name of the error code const)
-int parseErrorCode(DB::ReadBufferFromString & in)
+DB::TestHint::error_vector parseErrorCode(DB::ReadBufferFromString & in)
 {
-    int code = -1;
-    String code_name;
+    DB::TestHint::error_vector error_codes{};
 
-    auto * pos = in.position();
-    tryReadText(code, in);
-    if (pos != in.position())
+    while (!in.eof())
     {
-        return code;
+        int code = -1;
+        String code_name;
+        auto * pos = in.position();
+
+        tryReadText(code, in);
+        if (pos == in.position())
+        {
+            readStringUntilWhitespace(code_name, in);
+            code = DB::ErrorCodes::getErrorCodeByName(code_name);
+        }
+        error_codes.push_back(code);
+
+        if (in.eof())
+            break;
+        skipWhitespaceIfAny(in);
+        if (in.eof())
+            break;
+        char c;
+        in.readStrict(c);
+        if (c != '|')
+            throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected separator '|'. Got '{}'", c);
+        skipWhitespaceIfAny(in);
     }
 
-    /// Try parse as string
-    readStringUntilWhitespace(code_name, in);
-    return DB::ErrorCodes::getErrorCodeByName(code_name);
+    return error_codes;
 }
 
 }
@@ -85,9 +106,9 @@ void TestHint::parse(const String & hint, bool is_leading_hint)
         if (!is_leading_hint)
         {
             if (item == "serverError")
-                server_error = parseErrorCode(in);
+                server_errors = parseErrorCode(in);
             else if (item == "clientError")
-                client_error = parseErrorCode(in);
+                client_errors = parseErrorCode(in);
         }
 
         if (item == "echo")
diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h
index 7fa4e86c025..30b3cacd3cb 100644
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@@ -1,6 +1,10 @@
 #pragma once
 
 #include <optional>
+#include <vector>
+
+#include <fmt/format.h>
+
 #include <Core/Types.h>
 
 
@@ -12,10 +16,13 @@ namespace DB
 /// The following comment hints are supported:
 ///
 /// - "-- { serverError 60 }" -- in case of you are expecting server error.
+/// - "-- { serverError 16 | 36 }" -- in case of you are expecting one of the 2 errors
 ///
 /// - "-- { clientError 20 }" -- in case of you are expecting client error.
+/// - "-- { clientError 20 | 60 | 92 }" -- It's expected that the client will return one of the 3 errors.
 ///
 /// - "-- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
+/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }" -- by error name.
 ///
 /// - "-- { clientError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
 ///
@@ -43,29 +50,67 @@ namespace DB
 class TestHint
 {
 public:
+    using error_vector = std::vector<int>;
     TestHint(const String & query_);
 
-    int serverError() const { return server_error; }
-    int clientError() const { return client_error; }
+    const auto & serverErrors() const { return server_errors; }
+    const auto & clientErrors() const { return client_errors; }
     std::optional<bool> echoQueries() const { return echo; }
 
 private:
     const String & query;
-    int server_error = 0;
-    int client_error = 0;
+    error_vector server_errors{};
+    error_vector client_errors{};
     std::optional<bool> echo;
 
     void parse(const String & hint, bool is_leading_hint);
 
     bool allErrorsExpected(int actual_server_error, int actual_client_error) const
     {
-        return (server_error || client_error) && (server_error == actual_server_error) && (client_error == actual_client_error);
+        if (actual_server_error && std::find(server_errors.begin(), server_errors.end(), actual_server_error) == server_errors.end())
+            return false;
+        if (!actual_server_error && server_errors.size())
+            return false;
+
+        if (actual_client_error && std::find(client_errors.begin(), client_errors.end(), actual_client_error) == client_errors.end())
+            return false;
+        if (!actual_client_error && client_errors.size())
+            return false;
+
+        return true;
     }
 
     bool lostExpectedError(int actual_server_error, int actual_client_error) const
     {
-        return (server_error && !actual_server_error) || (client_error && !actual_client_error);
+        return (server_errors.size() && !actual_server_error) || (client_errors.size() && !actual_client_error);
     }
 };
 
 }
+
+template <>
+struct fmt::formatter<DB::TestHint::error_vector>
+{
+    static constexpr auto parse(format_parse_context & ctx)
+    {
+        const auto * it = ctx.begin();
+        const auto * end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}')
+            throw format_error("Invalid format");
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const DB::TestHint::error_vector & error_vector, FormatContext & ctx)
+    {
+        if (error_vector.empty())
+            return format_to(ctx.out(), "{}", 0);
+        else if (error_vector.size() == 1)
+            return format_to(ctx.out(), "{}", error_vector[0]);
+        else
+            return format_to(ctx.out(), "One of [{}]", fmt::join(error_vector, ", "));
+    }
+};
diff --git a/tests/queries/0_stateless/01470_columns_transformers.sql b/tests/queries/0_stateless/01470_columns_transformers.sql
index 22c30ed36bf..7cff1920a4e 100644
--- a/tests/queries/0_stateless/01470_columns_transformers.sql
+++ b/tests/queries/0_stateless/01470_columns_transformers.sql
@@ -1,5 +1,3 @@
-SET allow_experimental_analyzer = 1;
-
 DROP TABLE IF EXISTS columns_transformers;
 
 CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) Engine=TinyLog;
@@ -19,15 +17,15 @@ SELECT a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) from columns_transformers a;
 SELECT * EXCEPT STRICT i from columns_transformers;
 SELECT * EXCEPT STRICT (i, j) from columns_transformers;
 SELECT * EXCEPT STRICT i, j1 from columns_transformers; -- { serverError 47 }
-SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError 36 }
+SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }
 SELECT * REPLACE STRICT i + 1 AS i from columns_transformers;
-SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError 36 }
+SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }
 SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers;
 SELECT columns_transformers.* REPLACE(j + 2 AS j, i + 1 AS i) APPLY(avg) from columns_transformers;
 SELECT columns_transformers.* REPLACE(j + 1 AS j, j + 2 AS j) APPLY(avg) from columns_transformers; -- { serverError 43 }
 -- REPLACE after APPLY will not match anything
 SELECT a.* APPLY(toDate) REPLACE(i + 1 AS i) APPLY(any) from columns_transformers a;
-SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError 36 }
+SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }
 
 EXPLAIN SYNTAX SELECT * APPLY(sum) from columns_transformers;
 EXPLAIN SYNTAX SELECT columns_transformers.* APPLY(avg) from columns_transformers;

From 6c43781d116fb48512df6a966ca5bf9b3859639b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 3 Mar 2023 15:13:07 +0100
Subject: [PATCH 228/333] Try manually fixing tests

---
 .../0_stateless/00002_system_numbers.sql       |  2 +-
 .../0_stateless/00386_has_column_in_table.sql  | 10 +++++-----
 .../0_stateless/00718_format_datetime.sql      | 18 +++++++++---------
 .../queries/0_stateless/00975_values_list.sql  |  4 ++--
 .../0_stateless/01056_create_table_as.sql      |  6 +++---
 ...mize_skip_unused_shards_const_expr_eval.sql | 16 ++++++++--------
 ...timize_skip_unused_shards_type_mismatch.sql |  2 +-
 .../01225_drop_dictionary_as_table.sql         |  2 +-
 ...01225_show_create_table_from_dictionary.sql |  2 +-
 .../0_stateless/01231_log_queries_min_type.sql |  4 ++--
 ...t_block_size_rows_for_materialized_views.sh |  2 +-
 tests/queries/0_stateless/01284_port.sql.j2    |  6 +++---
 ...ctive_elimination_dictGet_BAD_ARGUMENTS.sql |  2 +-
 ..._GROUP_BY_injective_elimination_dictGet.sql |  2 +-
 .../01402_cast_nullable_string_to_enum.sql     |  8 ++++----
 .../01404_roundUpToPowerOfTwoOrZero_safety.sql |  2 +-
 .../0_stateless/01407_lambda_arrayJoin.sql     |  2 +-
 .../0_stateless/01408_range_overflow.sql       |  2 +-
 ..._trivial_count_with_partition_predicate.sql | 12 ++++++------
 ...timize_aggregation_in_order_memory_long.sql |  4 ++--
 .../01516_create_table_primary_key.sql         |  2 +-
 ...terministic_optimize_skip_unused_shards.sql |  2 +-
 .../01530_drop_database_atomic_sync.sql        |  2 +-
 .../01555_system_distribution_queue_mask.sql   |  2 +-
 .../0_stateless/01592_toUnixTimestamp_Date.sql |  2 +-
 .../queries/0_stateless/01595_countMatches.sql |  4 ++--
 .../01709_inactive_parts_to_throw_insert.sql   |  2 +-
 .../01710_projection_with_mixed_pipeline.sql   |  2 +-
 .../0_stateless/01888_read_int_safe.sql        | 16 ++++++++--------
 .../02008_tuple_to_name_value_pairs.sql        |  8 ++++----
 30 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/tests/queries/0_stateless/00002_system_numbers.sql b/tests/queries/0_stateless/00002_system_numbers.sql
index 95f75573201..d5934c7d387 100644
--- a/tests/queries/0_stateless/00002_system_numbers.sql
+++ b/tests/queries/0_stateless/00002_system_numbers.sql
@@ -6,7 +6,7 @@ SELECT number FROM system.numbers WHERE number >= 5 LIMIT 2;
 SELECT * FROM system.numbers WHERE number == 7 LIMIT 1;
 SELECT number AS n FROM system.numbers WHERE number IN(8, 9) LIMIT 2;
 select number from system.numbers limit 0;
-select x from system.numbers limit 1; -- { clientError 0 serverError 47 }
+select x from system.numbers limit 1; -- { serverError UNKNOWN_IDENTIFIER }
 SELECT x, number FROM system.numbers LIMIT 1; -- { serverError 47 }
 SELECT * FROM system.number LIMIT 1; -- { serverError 60 }
 SELECT * FROM system LIMIT 1; -- { serverError 60 }
diff --git a/tests/queries/0_stateless/00386_has_column_in_table.sql b/tests/queries/0_stateless/00386_has_column_in_table.sql
index d543bb42ca7..7347293e05b 100644
--- a/tests/queries/0_stateless/00386_has_column_in_table.sql
+++ b/tests/queries/0_stateless/00386_has_column_in_table.sql
@@ -21,11 +21,11 @@ SELECT hasColumnInTable('localhost', currentDatabase(), 'has_column_in_table', '
 SELECT hasColumnInTable('system', 'one', '');
 
 /* bad queries */
-SELECT hasColumnInTable('', '', '');  -- { serverError 60; }
-SELECT hasColumnInTable('', 't', 'c');  -- { serverError 81; }
-SELECT hasColumnInTable(currentDatabase(), '', 'c'); -- { serverError 60; }
-SELECT hasColumnInTable('d', 't', 's');  -- { serverError 81; }
-SELECT hasColumnInTable(currentDatabase(), 't', 's');  -- { serverError 60; }
+SELECT hasColumnInTable('', '', '');  -- { serverError 60 }
+SELECT hasColumnInTable('', 't', 'c');  -- { serverError 81 }
+SELECT hasColumnInTable(currentDatabase(), '', 'c'); -- { serverError 60 }
+SELECT hasColumnInTable('d', 't', 's');  -- { serverError 81 }
+SELECT hasColumnInTable(currentDatabase(), 't', 's');  -- { serverError 60 }
 
 
 DROP TABLE has_column_in_table;
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index 74ec03d83d3..3f8c927dfe7 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -1,14 +1,14 @@
 SET send_logs_level = 'fatal';
 
-SELECT formatDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH (42) }
-SELECT formatDateTime('not a datetime', 'IGNORED'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) }
-SELECT formatDateTime(now(), now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) }
-SELECT formatDateTime(now(), 'good format pattern', now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) }
-SELECT formatDateTime(now(), 'unescaped %'); -- { serverError BAD_ARGUMENTS (36) }
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError NOT_IMPLEMENTED (48) }
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%v'); -- { serverError NOT_IMPLEMENTED (48) }
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%x'); -- { serverError NOT_IMPLEMENTED (48) }
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%X'); -- { serverError NOT_IMPLEMENTED (48) }
+SELECT formatDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT formatDateTime('not a datetime', 'IGNORED'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT formatDateTime(now(), now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT formatDateTime(now(), 'good format pattern', now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT formatDateTime(now(), 'unescaped %'); -- { serverError BAD_ARGUMENTS }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError NOT_IMPLEMENTED }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%v'); -- { serverError NOT_IMPLEMENTED }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%x'); -- { serverError NOT_IMPLEMENTED }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%X'); -- { serverError NOT_IMPLEMENTED }
 
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%a'), formatDateTime(toDate32('2018-01-02'), '%a');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%b'), formatDateTime(toDate32('2018-01-02'), '%b');
diff --git a/tests/queries/0_stateless/00975_values_list.sql b/tests/queries/0_stateless/00975_values_list.sql
index 40c86898966..35afc99e93e 100644
--- a/tests/queries/0_stateless/00975_values_list.sql
+++ b/tests/queries/0_stateless/00975_values_list.sql
@@ -12,8 +12,8 @@ SELECT * FROM VALUES('n UInt64, s String, ss String', (1 + 22, '23', toString(23
 
 SELECT * FROM VALUES('a Decimal(4, 4), b String, c String', (divide(toDecimal32(5, 3), 3), 'a', 'b'));
 
-SELECT * FROM VALUES('x Float64', toUInt64(-1)); -- { serverError 69; }
-SELECT * FROM VALUES('x Float64', NULL); -- { serverError 53; }
+SELECT * FROM VALUES('x Float64', toUInt64(-1)); -- { serverError 69 }
+SELECT * FROM VALUES('x Float64', NULL); -- { serverError 53 }
 SELECT * FROM VALUES('x Nullable(Float64)', NULL);
 
 DROP TABLE values_list;
diff --git a/tests/queries/0_stateless/01056_create_table_as.sql b/tests/queries/0_stateless/01056_create_table_as.sql
index 62db8282ac0..2e146d67ca9 100644
--- a/tests/queries/0_stateless/01056_create_table_as.sql
+++ b/tests/queries/0_stateless/01056_create_table_as.sql
@@ -19,12 +19,12 @@ DROP TABLE t3;
 -- live view
 SET allow_experimental_live_view=1;
 CREATE LIVE VIEW lv AS SELECT * FROM t1;
-CREATE TABLE t3 AS lv; -- { serverError 80; }
+CREATE TABLE t3 AS lv; -- { serverError 80 }
 DROP TABLE lv;
 
 -- view
 CREATE VIEW v AS SELECT * FROM t1;
-CREATE TABLE t3 AS v; -- { serverError 80; }
+CREATE TABLE t3 AS v; -- { serverError 80 }
 DROP TABLE v;
 
 -- dictionary
@@ -43,7 +43,7 @@ SOURCE(CLICKHOUSE(
     TABLE 'dict_data' DB 'test_01056_dict_data' USER 'default' PASSWORD ''))
 LIFETIME(MIN 0 MAX 0)
 LAYOUT(SPARSE_HASHED());
-CREATE TABLE t3 AS dict; -- { serverError 80; }
+CREATE TABLE t3 AS dict; -- { serverError 80 }
 
 DROP TABLE IF EXISTS t1;
 DROP TABLE IF EXISTS t3;
diff --git a/tests/queries/0_stateless/01072_optimize_skip_unused_shards_const_expr_eval.sql b/tests/queries/0_stateless/01072_optimize_skip_unused_shards_const_expr_eval.sql
index 85c239765bc..24eaaacb8bd 100644
--- a/tests/queries/0_stateless/01072_optimize_skip_unused_shards_const_expr_eval.sql
+++ b/tests/queries/0_stateless/01072_optimize_skip_unused_shards_const_expr_eval.sql
@@ -16,16 +16,16 @@ select * from dist_01072 where key=toInt32OrZero(toString(xxHash64(0)));
 select * from dist_01072 where key=toInt32(xxHash32(0));
 select * from dist_01072 where key=toInt32(toInt32(xxHash32(0)));
 select * from dist_01072 where key=toInt32(toInt32(toInt32(xxHash32(0))));
-select * from dist_01072 where key=value; -- { serverError 507; }
-select * from dist_01072 where key=toInt32(value); -- { serverError 507; }
+select * from dist_01072 where key=value; -- { serverError 507 }
+select * from dist_01072 where key=toInt32(value); -- { serverError 507 }
 select * from dist_01072 where key=value settings force_optimize_skip_unused_shards=0;
 select * from dist_01072 where key=toInt32(value) settings force_optimize_skip_unused_shards=0;
 
 drop table dist_01072;
 create table dist_01072 (key Int, value Nullable(Int), str String) Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01072, key%2);
 select * from dist_01072 where key=toInt32(xxHash32(0));
-select * from dist_01072 where key=value; -- { serverError 507; }
-select * from dist_01072 where key=toInt32(value); -- { serverError 507; }
+select * from dist_01072 where key=value; -- { serverError 507 }
+select * from dist_01072 where key=toInt32(value); -- { serverError 507 }
 select * from dist_01072 where key=value settings force_optimize_skip_unused_shards=0;
 select * from dist_01072 where key=toInt32(value) settings force_optimize_skip_unused_shards=0;
 
@@ -34,16 +34,16 @@ set allow_suspicious_low_cardinality_types=1;
 drop table dist_01072;
 create table dist_01072 (key Int, value LowCardinality(Int), str String) Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01072, key%2);
 select * from dist_01072 where key=toInt32(xxHash32(0));
-select * from dist_01072 where key=value; -- { serverError 507; }
-select * from dist_01072 where key=toInt32(value); -- { serverError 507; }
+select * from dist_01072 where key=value; -- { serverError 507 }
+select * from dist_01072 where key=toInt32(value); -- { serverError 507 }
 select * from dist_01072 where key=value settings force_optimize_skip_unused_shards=0;
 select * from dist_01072 where key=toInt32(value) settings force_optimize_skip_unused_shards=0;
 
 drop table dist_01072;
 create table dist_01072 (key Int, value LowCardinality(Nullable(Int)), str String) Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01072, key%2);
 select * from dist_01072 where key=toInt32(xxHash32(0));
-select * from dist_01072 where key=value; -- { serverError 507; }
-select * from dist_01072 where key=toInt32(value); -- { serverError 507; }
+select * from dist_01072 where key=value; -- { serverError 507 }
+select * from dist_01072 where key=toInt32(value); -- { serverError 507 }
 select * from dist_01072 where key=value settings force_optimize_skip_unused_shards=0;
 select * from dist_01072 where key=toInt32(value) settings force_optimize_skip_unused_shards=0;
 
diff --git a/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql b/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql
index 65adaf3ad71..de41132df62 100644
--- a/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql
+++ b/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql
@@ -9,7 +9,7 @@ create table data_02000 (key Int) Engine=Null();
 create table dist_02000 as data_02000 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_02000, key);
 
 select * from data_02000 where key = 0xdeadbeafdeadbeaf;
-select * from dist_02000 where key = 0xdeadbeafdeadbeaf settings force_optimize_skip_unused_shards=2; -- { serverError 507; }
+select * from dist_02000 where key = 0xdeadbeafdeadbeaf settings force_optimize_skip_unused_shards=2; -- { serverError 507 }
 select * from dist_02000 where key = 0xdeadbeafdeadbeaf;
 
 drop table data_02000;
diff --git a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql
index 513ecbd4ed4..be2f7b2a9bf 100644
--- a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql
+++ b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql
@@ -16,7 +16,7 @@ LAYOUT(FLAT());
 
 SYSTEM RELOAD DICTIONARY dict_db_01225.dict;
 
-DROP TABLE dict_db_01225.dict; -- { serverError 520; }
+DROP TABLE dict_db_01225.dict; -- { serverError 520 }
 DROP DICTIONARY dict_db_01225.dict;
 
 DROP DATABASE dict_db_01225;
diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql
index 09cde642ed2..bc733a0c546 100644
--- a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql
+++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql
@@ -18,7 +18,7 @@ LIFETIME(MIN 0 MAX 0)
 LAYOUT(FLAT());
 
 SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.dict` FORMAT TSVRaw;
-SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 487; }
+SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 487 }
 
 DROP DATABASE dict_db_01225;
 DROP DATABASE dict_db_01225_dictionary;
diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql
index c2470bb9a56..0ed5e3e605c 100644
--- a/tests/queries/0_stateless/01231_log_queries_min_type.sql
+++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql
@@ -15,7 +15,7 @@ select count() from system.query_log where current_database = currentDatabase()
 
 set max_rows_to_read='100K';
 set log_queries_min_type='EXCEPTION_WHILE_PROCESSING';
-select '01231_log_queries_min_type/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158; }
+select '01231_log_queries_min_type/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158 }
 set max_rows_to_read=0;
 system flush logs;
 select count() from system.query_log where current_database = currentDatabase()
@@ -23,7 +23,7 @@ select count() from system.query_log where current_database = currentDatabase()
     and event_date >= yesterday() and type = 'ExceptionWhileProcessing';
 
 set max_rows_to_read='100K';
-select '01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158; }
+select '01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158 }
 system flush logs;
 set max_rows_to_read=0;
 select count() from system.query_log where
diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh
index 0e258bbbb09..08cc97c84bf 100755
--- a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh
+++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh
@@ -76,7 +76,7 @@ insert into data_01278 select
     reinterpretAsString(number), // s6
     reinterpretAsString(number), // s7
     reinterpretAsString(number)  // s8
-from numbers(100000); -- { serverError 241; }" > /dev/null 2>&1
+from numbers(100000); -- { serverError 241 }" > /dev/null 2>&1
     local ret_code=$?
     if [[ $ret_code -eq 0 ]];
     then
diff --git a/tests/queries/0_stateless/01284_port.sql.j2 b/tests/queries/0_stateless/01284_port.sql.j2
index 6f78b3b8e3b..50e096c6deb 100644
--- a/tests/queries/0_stateless/01284_port.sql.j2
+++ b/tests/queries/0_stateless/01284_port.sql.j2
@@ -19,9 +19,9 @@ select port{{ suffix }}('http://127.0.0.1/', toUInt16(80));
 select port{{ suffix }}('http://foobar.com/', toUInt16(80));
 
 -- unsupported
-/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43; }
-/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43; }
-/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42; }
+/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43 }
+/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43 }
+/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42 }
 
 --
 -- Known limitations of domain() (getURLHost())
diff --git a/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql b/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql
index 88a2b25c2db..8ff9cd2b9f2 100644
--- a/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql
+++ b/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql
@@ -1 +1 @@
-SELECT dictGetString(concat('default', '.countryId'), 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36; }
+SELECT dictGetString(concat('default', '.countryId'), 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36 }
diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql
index 258d96829a5..29ffcb46fbf 100644
--- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql
+++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql
@@ -1,7 +1,7 @@
 -- Tags: no-parallel
 
 -- https://github.com/ClickHouse/ClickHouse/issues/11469
-SELECT dictGet('default.countryId', 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36; }
+SELECT dictGet('default.countryId', 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36 }
 
 
 -- with real dictionary
diff --git a/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql b/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql
index 3b53e593095..b8b5370515a 100644
--- a/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql
+++ b/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql
@@ -5,9 +5,9 @@ SELECT CAST(CAST(NULL AS Nullable(String)) AS Nullable(Enum8('Hello' = 1)));
 SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1)));
 
 -- empty string still not acceptable
-SELECT CAST(CAST('' AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36; }
-SELECT CAST(CAST('' AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36; }
+SELECT CAST(CAST('' AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36 }
+SELECT CAST(CAST('' AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36 }
 
 -- non-Nullable Enum() still not acceptable
-SELECT CAST(CAST(NULL AS Nullable(String)) AS Enum8('Hello' = 1)); -- { serverError 349; }
-SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Enum8('Hello' = 1)); -- { serverError 349; }
+SELECT CAST(CAST(NULL AS Nullable(String)) AS Enum8('Hello' = 1)); -- { serverError 349 }
+SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Enum8('Hello' = 1)); -- { serverError 349 }
diff --git a/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql b/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql
index 4ee6e1fa5e4..d61a35c9999 100644
--- a/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql
+++ b/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql
@@ -1,4 +1,4 @@
 -- repeat() with this length and this number of rows will allocation huge enough region (MSB set),
 -- which will cause roundUpToPowerOfTwoOrZero() returns 0 for such allocation (before the fix),
 -- and later repeat() will try to use this memory and will got SIGSEGV.
-SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131; }
+SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131 }
diff --git a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql
index 363b1d92dbb..e1b8c1d5a76 100644
--- a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql
+++ b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql
@@ -1,5 +1,5 @@
 SELECT arrayFilter((a) -> ((a, arrayJoin([])) IN (Null, [Null])), []);
 SELECT arrayFilter((a) -> ((a, arrayJoin([[]])) IN (Null, [Null])), []);
 
-SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43; }
+SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43 }
 SELECT * FROM numbers(1) LEFT ARRAY JOIN arrayFilter((x_0, x_1) -> (arrayJoin([]) IN (NULL)), [], []) AS arr_x;
diff --git a/tests/queries/0_stateless/01408_range_overflow.sql b/tests/queries/0_stateless/01408_range_overflow.sql
index 2107e8c3f36..d26507f8358 100644
--- a/tests/queries/0_stateless/01408_range_overflow.sql
+++ b/tests/queries/0_stateless/01408_range_overflow.sql
@@ -1,7 +1,7 @@
 -- executeGeneric()
 SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807);
 SELECT range(1025, 1048576 + (9223372036854775807 AS i), i);
-SELECT range(1025, 18446744073709551615, 1); -- { serverError 69; }
+SELECT range(1025, 18446744073709551615, 1); -- { serverError 69 }
 
 -- executeConstStep()
 SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025;
diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql
index e4e2e3dd76a..e8643a4468c 100644
--- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql
+++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql
@@ -7,16 +7,16 @@ insert into test1 values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2),
 
 set max_rows_to_read = 1;
 -- non-optimized
-select count() from test1 settings max_parallel_replicas = 3; -- { serverError 158; }
+select count() from test1 settings max_parallel_replicas = 3; -- { serverError 158 }
 -- optimized (toYear is monotonic and we provide the partition expr as is)
 select count() from test1 where toYear(toDate(p)) = 1999;
 -- non-optimized (toDate(DateTime) is always monotonic, but we cannot relaxing the predicates to do trivial count())
-select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); -- { serverError 158; }
+select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); -- { serverError 158 }
 -- optimized (partition expr wrapped with non-monotonic functions)
 select count() FROM test1 where toDate(p) = '2020-09-01' and sipHash64(toString(toDate(p))) % 2 = 1;
 select count() FROM test1 where toDate(p) = '2020-09-01' and sipHash64(toString(toDate(p))) % 2 = 0;
 -- non-optimized (some predicate depends on non-partition_expr columns)
-select count() FROM test1 where toDate(p) = '2020-09-01' and k = 2; -- { serverError 158; }
+select count() FROM test1 where toDate(p) = '2020-09-01' and k = 2; -- { serverError 158 }
 -- optimized
 select count() from test1 where toDate(p) > '2020-09-01';
 -- non-optimized
@@ -35,10 +35,10 @@ select count() from test_tuple where i > 2;
 -- optimized
 select count() from test_tuple where i < 1;
 -- non-optimized
-select count() from test_tuple array join [p,p] as c where toDate(p) = '2020-09-01'; -- { serverError 158; }
+select count() from test_tuple array join [p,p] as c where toDate(p) = '2020-09-01'; -- { serverError 158 }
 select count() from test_tuple array join [1,2] as c where toDate(p) = '2020-09-01' settings max_rows_to_read = 4;
 -- non-optimized
-select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01'; -- { serverError 158; }
+select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01'; -- { serverError 158 }
 select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01' settings max_rows_to_read = 6;
 
 create table test_two_args(i int, j int, k int) engine MergeTree partition by i + j order by k settings index_granularity = 1;
@@ -48,7 +48,7 @@ insert into test_two_args values (1, 2, 3), (2, 1, 3), (0, 3, 4);
 -- optimized
 select count() from test_two_args where i + j = 3;
 -- non-optimized
-select count() from test_two_args where i = 1; -- { serverError 158; }
+select count() from test_two_args where i = 1; -- { serverError 158 }
 
 drop table test1;
 drop table test_tuple;
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
index 228e4d73167..3d57518d0f4 100644
--- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
+++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
@@ -13,9 +13,9 @@ set max_memory_usage='500M';
 set max_threads=1;
 set max_block_size=500;
 
-select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241; }
+select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241 }
 select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1;
 -- for WITH TOTALS previous groups should be kept.
-select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241; }
+select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241 }
 
 drop table data_01513;
diff --git a/tests/queries/0_stateless/01516_create_table_primary_key.sql b/tests/queries/0_stateless/01516_create_table_primary_key.sql
index b2b9f288eab..630c573c2cc 100644
--- a/tests/queries/0_stateless/01516_create_table_primary_key.sql
+++ b/tests/queries/0_stateless/01516_create_table_primary_key.sql
@@ -35,7 +35,7 @@ ATTACH TABLE primary_key_test(v1 Int32, v2 Int32) ENGINE=ReplacingMergeTree ORDE
 SELECT * FROM primary_key_test FINAL;
 DROP TABLE primary_key_test;
 
-CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36; }
+CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36 }
 
 CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY (v1, gcd(v1, v2));
 
diff --git a/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql b/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql
index 08fba7480d1..ac04178e585 100644
--- a/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql
+++ b/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql
@@ -5,7 +5,7 @@ create table dist_01528 as system.one engine=Distributed('test_cluster_two_shard
 
 set optimize_skip_unused_shards=1;
 set force_optimize_skip_unused_shards=1;
-select * from dist_01528 where dummy = 2; -- { serverError 507; }
+select * from dist_01528 where dummy = 2; -- { serverError 507 }
 select * from dist_01528 where dummy = 2 settings allow_nondeterministic_optimize_skip_unused_shards=1;
 
 drop table dist_01528;
diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql
index 7a2e64742cf..13b4a4e331b 100644
--- a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql
+++ b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql
@@ -30,7 +30,7 @@ create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickho
 drop database db_01530_atomic;
 
 create database db_01530_atomic Engine=Atomic;
-create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/db_01530_atomic/data', 'test') order by key; -- { serverError 253; }
+create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/db_01530_atomic/data', 'test') order by key; -- { serverError 253 }
 
 set database_atomic_wait_for_drop_and_detach_synchronously=1;
 
diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
index fea75e1439f..f19c77c68a3 100644
--- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
+++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql
@@ -17,7 +17,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect
 
 insert into dist_01555 values (1)(2);
 -- since test_cluster_with_incorrect_pw contains incorrect password ignore error
-system flush distributed dist_01555; -- { serverError 516; }
+system flush distributed dist_01555; -- { serverError 516 }
 select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV;
 
 drop table dist_01555;
diff --git a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql
index 5dc87e31f75..e8411484d71 100644
--- a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql
+++ b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql
@@ -1 +1 @@
-select toUnixTimestamp(today()); -- { serverError 44; }
+select toUnixTimestamp(today()); -- { serverError 44 }
diff --git a/tests/queries/0_stateless/01595_countMatches.sql b/tests/queries/0_stateless/01595_countMatches.sql
index 6374fe7bc5b..0b170945d44 100644
--- a/tests/queries/0_stateless/01595_countMatches.sql
+++ b/tests/queries/0_stateless/01595_countMatches.sql
@@ -25,5 +25,5 @@ select countMatchesCaseInsensitive('foo.com BAR.COM baz.com bam.com', '([^. ]+)\
 select countMatchesCaseInsensitive('foo.com@foo.com bar.com@foo.com BAZ.com@foo.com bam.com@foo.com', '([^. ]+)\.([^. ]+)@([^. ]+)\.([^. ]+)');
 
 select 'errors';
-select countMatches(1, 'foo') from numbers(1); -- { serverError 43; }
-select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44; }
+select countMatches(1, 'foo') from numbers(1); -- { serverError 43 }
+select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44 }
diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql b/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql
index 6de0d4f4e0c..2bb92aec713 100644
--- a/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql
+++ b/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql
@@ -7,6 +7,6 @@ insert into data_01709 values (2);
 
 optimize table data_01709 final;
 
-insert into data_01709 values (3); -- { serverError 252; }
+insert into data_01709 values (3); -- { serverError 252 }
 
 drop table data_01709;
diff --git a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql
index 734aa659146..5169c667b81 100644
--- a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql
+++ b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql
@@ -4,6 +4,6 @@ create table t (x UInt32) engine = MergeTree order by tuple() settings index_gra
 insert into t select number from numbers(100);
 alter table t add projection p (select uniqHLL12(x));
 insert into t select number + 100 from numbers(100);
-select uniqHLL12(x) from t settings allow_experimental_projection_optimization = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307; }
+select uniqHLL12(x) from t settings allow_experimental_projection_optimization = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307 }
 
 drop table if exists t;
diff --git a/tests/queries/0_stateless/01888_read_int_safe.sql b/tests/queries/0_stateless/01888_read_int_safe.sql
index 3aea8e38ab0..197338775c4 100644
--- a/tests/queries/0_stateless/01888_read_int_safe.sql
+++ b/tests/queries/0_stateless/01888_read_int_safe.sql
@@ -1,10 +1,10 @@
-select toInt64('--1'); -- { serverError 72; }
-select toInt64('+-1'); -- { serverError 72; }
-select toInt64('++1'); -- { serverError 72; }
-select toInt64('++'); -- { serverError 72; }
-select toInt64('+'); -- { serverError 72; }
-select toInt64('1+1'); -- { serverError 6; }
-select toInt64('1-1'); -- { serverError 6; }
-select toInt64(''); -- { serverError 32; }
+select toInt64('--1'); -- { serverError 72 }
+select toInt64('+-1'); -- { serverError 72 }
+select toInt64('++1'); -- { serverError 72 }
+select toInt64('++'); -- { serverError 72 }
+select toInt64('+'); -- { serverError 72 }
+select toInt64('1+1'); -- { serverError 6 }
+select toInt64('1-1'); -- { serverError 6 }
+select toInt64(''); -- { serverError 32 }
 select toInt64('1');
 select toInt64('-1');
diff --git a/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql b/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql
index 59987a86590..1f6026bb61e 100644
--- a/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql
+++ b/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql
@@ -19,7 +19,7 @@ INSERT INTO test02008 VALUES (tuple(3.3, 5.5, 6.6));
 SELECT untuple(arrayJoin(tupleToNameValuePairs(col))) from test02008;
 
 DROP TABLE IF EXISTS test02008;
-SELECT tupleToNameValuePairs(tuple(1, 1.3)); -- { serverError 43; }
-SELECT tupleToNameValuePairs(tuple(1, [1,2])); -- { serverError 43; }
-SELECT tupleToNameValuePairs(tuple(1, 'a')); -- { serverError 43; }
-SELECT tupleToNameValuePairs(33); -- { serverError 43; }
+SELECT tupleToNameValuePairs(tuple(1, 1.3)); -- { serverError 43 }
+SELECT tupleToNameValuePairs(tuple(1, [1,2])); -- { serverError 43 }
+SELECT tupleToNameValuePairs(tuple(1, 'a')); -- { serverError 43 }
+SELECT tupleToNameValuePairs(33); -- { serverError 43 }

From cb3dd3c2009ac2c5ab9e397dbf91675472f0611c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 6 Mar 2023 13:06:31 +0100
Subject: [PATCH 229/333] Fix flaky test

---
 tests/integration/test_filesystem_layout/test.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/integration/test_filesystem_layout/test.py b/tests/integration/test_filesystem_layout/test.py
index 898bbc40eb9..2be478f95d0 100644
--- a/tests/integration/test_filesystem_layout/test.py
+++ b/tests/integration/test_filesystem_layout/test.py
@@ -44,8 +44,6 @@ def test_file_path_escaping(started_cluster):
         ]
     )
 
-
-def test_file_path_escaping_atomic_db(started_cluster):
     node.query("CREATE DATABASE IF NOT EXISTS `test 2` ENGINE = Atomic")
     node.query(
         """

From aa776d00fec38fc80c4ed584fe46e9d0a82ac3fd Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 6 Mar 2023 13:24:36 +0100
Subject: [PATCH 230/333] Use executors snapshot

---
 src/Interpreters/ProcessList.cpp | 33 +++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 37cb9ee599f..bf452775d27 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -397,23 +397,26 @@ CancellationCode QueryStatus::cancelQuery(bool)
 
     is_killed.store(true);
 
-    std::unique_lock lock(executors_mutex);
-    for (const auto & e : executors)
+    std::vector<ExecutorHolderPtr> executors_snapshot;
+
     {
-        /// We should call cancel() with unlocked executors_mutex, because
-        /// cancel() can try to lock some internal mutex that is already locked by query executing
-        /// thread, and query executing thread can call removePipelineExecutor and lock executors_mutex,
-        /// which will lead to deadlock.
-        /// Note that the size and the content of executors cannot be changed while
-        /// executors_mutex is unlocked, because:
-        /// 1) We don't allow adding new executors while cancelling query in addPipelineExecutor
-        /// 2) We don't actually remove executor holder from executors in removePipelineExecutor,
-        /// just mark that executor is invalid.
-        /// So, it's safe to continue iteration over executors after subsequent mutex locking.
-        lock.unlock();
-        e->cancel();
-        lock.lock();
+        /// Create a snapshot of executors under a mutex.
+        std::lock_guard lock(executors_mutex);
+        executors_snapshot = executors;
     }
+    
+    /// We should call cancel() for each executor with unlocked executors_mutex, because
+    /// cancel() can try to lock some internal mutex that is already locked by query executing
+    /// thread, and query executing thread can call removePipelineExecutor and lock executors_mutex,
+    /// which will lead to deadlock.
+    /// Note that the size and the content of executors cannot be changed while
+    /// executors_mutex is unlocked, because:
+    /// 1) We don't allow adding new executors while cancelling query in addPipelineExecutor
+    /// 2) We don't actually remove executor holder from executors in removePipelineExecutor,
+    /// just mark that executor is invalid.
+    /// So, it's ok to use a snapshot created above under a mutex, it won't be any differ from actual executors.
+    for (const auto & e : executors_shapshot)
+        e->cancel();
 
     return CancellationCode::CancelSent;
 }

From 9117c7491dde2fbe4e01a90f44cd433f5521c9cc Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 6 Mar 2023 12:55:02 +0000
Subject: [PATCH 231/333] Join threads if exception happened in constructor

---
 src/Common/ZooKeeper/ZooKeeperImpl.cpp | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index 1fbdd857379..b637bdea835 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -358,12 +358,27 @@ ZooKeeper::ZooKeeper(
     if (!args.auth_scheme.empty())
         sendAuth(args.auth_scheme, args.identity);
 
-    send_thread = ThreadFromGlobalPool([this] { sendThread(); });
-    receive_thread = ThreadFromGlobalPool([this] { receiveThread(); });
+    try
+    {
+        send_thread = ThreadFromGlobalPool([this] { sendThread(); });
+        receive_thread = ThreadFromGlobalPool([this] { receiveThread(); });
 
-    initApiVersion();
+        initApiVersion();
 
-    ProfileEvents::increment(ProfileEvents::ZooKeeperInit);
+        ProfileEvents::increment(ProfileEvents::ZooKeeperInit);
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log, "Failed to connect to ZooKeeper");
+
+        if (send_thread.joinable())
+            send_thread.join();
+
+        if (receive_thread.joinable())
+            receive_thread.join();
+
+        throw;
+    }
 }
 
 

From 13bda10470f9e710bf7b6a287f33b8700f34f4a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 6 Mar 2023 14:14:03 +0100
Subject: [PATCH 232/333] Reimplement hints using the parser

---
 src/Client/TestHint.cpp                       | 139 +++++++++---------
 src/Client/TestHint.h                         |   4 +-
 .../01470_columns_transformers.sql            |   6 +-
 3 files changed, 78 insertions(+), 71 deletions(-)

diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp
index adaae5fe5ee..c9e845f2039 100644
--- a/src/Client/TestHint.cpp
+++ b/src/Client/TestHint.cpp
@@ -1,55 +1,17 @@
-#include "TestHint.h"
+#include <charconv>
+#include <string_view>
+
+#include <Client/TestHint.h>
 
-#include <Common/Exception.h>
-#include <Common/ErrorCodes.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/ReadHelpers.h>
 #include <Parsers/Lexer.h>
+#include <Common/ErrorCodes.h>
+#include <Common/Exception.h>
 
 namespace DB::ErrorCodes
 {
 extern const int CANNOT_PARSE_TEXT;
 }
 
-namespace
-{
-
-/// Parse error as number or as a string (name of the error code const)
-DB::TestHint::error_vector parseErrorCode(DB::ReadBufferFromString & in)
-{
-    DB::TestHint::error_vector error_codes{};
-
-    while (!in.eof())
-    {
-        int code = -1;
-        String code_name;
-        auto * pos = in.position();
-
-        tryReadText(code, in);
-        if (pos == in.position())
-        {
-            readStringUntilWhitespace(code_name, in);
-            code = DB::ErrorCodes::getErrorCodeByName(code_name);
-        }
-        error_codes.push_back(code);
-
-        if (in.eof())
-            break;
-        skipWhitespaceIfAny(in);
-        if (in.eof())
-            break;
-        char c;
-        in.readStrict(c);
-        if (c != '|')
-            throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected separator '|'. Got '{}'", c);
-        skipWhitespaceIfAny(in);
-    }
-
-    return error_codes;
-}
-
-}
-
 namespace DB
 {
 
@@ -81,8 +43,8 @@ TestHint::TestHint(const String & query_)
                     size_t pos_end = comment.find('}', pos_start);
                     if (pos_end != String::npos)
                     {
-                        String hint(comment.begin() + pos_start + 1, comment.begin() + pos_end);
-                        parse(hint, is_leading_hint);
+                        Lexer comment_lexer(comment.c_str() + pos_start + 1, comment.c_str() + pos_end, 0);
+                        parse(comment_lexer, is_leading_hint);
                     }
                 }
             }
@@ -90,33 +52,76 @@ TestHint::TestHint(const String & query_)
     }
 }
 
-void TestHint::parse(const String & hint, bool is_leading_hint)
+void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
 {
-    ReadBufferFromString in(hint);
-    String item;
+    std::unordered_set<String> commands{"echo", "echoOn", "echoOff"};
 
-    while (!in.eof())
+    std::unordered_set<String> command_errors{
+        "serverError",
+        "clientError",
+    };
+
+    for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken())
     {
-        readStringUntilWhitespace(item, in);
-        if (in.eof())
-            break;
-
-        skipWhitespaceIfAny(in);
-
-        if (!is_leading_hint)
+        String item = String(token.begin, token.end);
+        if (token.type == TokenType::BareWord && commands.contains(item))
         {
-            if (item == "serverError")
-                server_errors = parseErrorCode(in);
-            else if (item == "clientError")
-                client_errors = parseErrorCode(in);
+            if (item == "echo")
+                echo.emplace(true);
+            if (item == "echoOn")
+                echo.emplace(true);
+            if (item == "echoOff")
+                echo.emplace(false);
         }
+        else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item))
+        {
+            /// Everything after this must be a list of errors separated by comma
+            error_vector error_codes;
+            while (!token.isEnd())
+            {
+                token = comment_lexer.nextToken();
+                if (token.type == TokenType::Whitespace)
+                    continue;
+                if (token.type == TokenType::Number)
+                {
+                    int code;
+                    auto [p, ec] = std::from_chars(token.begin, token.end, code);
+                    if (p == token.begin)
+                        throw DB::Exception(
+                            DB::ErrorCodes::CANNOT_PARSE_TEXT,
+                            "Could not parse integer number for errorcode: {}",
+                            std::string_view(token.begin, token.end));
+                    error_codes.push_back(code);
+                }
+                else if (token.type == TokenType::BareWord)
+                {
+                    int code = code = DB::ErrorCodes::getErrorCodeByName(std::string_view(token.begin, token.end));
+                    error_codes.push_back(code);
+                }
+                else
+                    throw DB::Exception(
+                        DB::ErrorCodes::CANNOT_PARSE_TEXT,
+                        "Could not parse error code in {}: {}",
+                        getTokenName(token.type),
+                        std::string_view(token.begin, token.end));
+                do
+                {
+                    token = comment_lexer.nextToken();
+                } while (!token.isEnd() && token.type == TokenType::Whitespace);
 
-        if (item == "echo")
-            echo.emplace(true);
-        if (item == "echoOn")
-            echo.emplace(true);
-        if (item == "echoOff")
-            echo.emplace(false);
+                if (!token.isEnd() && token.type != TokenType::Comma)
+                    throw DB::Exception(
+                        DB::ErrorCodes::CANNOT_PARSE_TEXT,
+                        "Could not parse error code. Expected ','. Got '{}'",
+                        std::string_view(token.begin, token.end));
+            }
+
+            if (item == "serverError")
+                server_errors = error_codes;
+            else
+                client_errors = error_codes;
+            break;
+        }
     }
 }
 
diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h
index 30b3cacd3cb..fff82502078 100644
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@@ -11,6 +11,8 @@
 namespace DB
 {
 
+class Lexer;
+
 /// Checks expected server and client error codes.
 ///
 /// The following comment hints are supported:
@@ -63,7 +65,7 @@ private:
     error_vector client_errors{};
     std::optional<bool> echo;
 
-    void parse(const String & hint, bool is_leading_hint);
+    void parse(Lexer & comment_lexer, bool is_leading_hint);
 
     bool allErrorsExpected(int actual_server_error, int actual_client_error) const
     {
diff --git a/tests/queries/0_stateless/01470_columns_transformers.sql b/tests/queries/0_stateless/01470_columns_transformers.sql
index 7cff1920a4e..8840ce3f3b5 100644
--- a/tests/queries/0_stateless/01470_columns_transformers.sql
+++ b/tests/queries/0_stateless/01470_columns_transformers.sql
@@ -17,15 +17,15 @@ SELECT a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) from columns_transformers a;
 SELECT * EXCEPT STRICT i from columns_transformers;
 SELECT * EXCEPT STRICT (i, j) from columns_transformers;
 SELECT * EXCEPT STRICT i, j1 from columns_transformers; -- { serverError 47 }
-SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }
+SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE , BAD_ARGUMENTS }
 SELECT * REPLACE STRICT i + 1 AS i from columns_transformers;
-SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }
+SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS }
 SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers;
 SELECT columns_transformers.* REPLACE(j + 2 AS j, i + 1 AS i) APPLY(avg) from columns_transformers;
 SELECT columns_transformers.* REPLACE(j + 1 AS j, j + 2 AS j) APPLY(avg) from columns_transformers; -- { serverError 43 }
 -- REPLACE after APPLY will not match anything
 SELECT a.* APPLY(toDate) REPLACE(i + 1 AS i) APPLY(any) from columns_transformers a;
-SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }
+SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS }
 
 EXPLAIN SYNTAX SELECT * APPLY(sum) from columns_transformers;
 EXPLAIN SYNTAX SELECT columns_transformers.* APPLY(avg) from columns_transformers;

From 12525f768cd2375d6605a4cf9e2ebdcdda16dfd0 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Mon, 6 Mar 2023 14:18:01 +0100
Subject: [PATCH 233/333] Add default constructor for `MultiReadResponse`
 (#47254)

* Add default constructor for MultiReadResponse
* Remove optional
* Fix style
---
 src/Backups/BackupCoordinationRemote.cpp | 18 +++++++--------
 src/Common/ZooKeeper/ZooKeeper.h         | 28 +++++++++++++++++++++---
 2 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 029a27d2712..c0fb4d5e066 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -552,7 +552,7 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
 
     for (auto & batch : batched_escaped_names)
     {
-        std::optional<zkutil::ZooKeeper::MultiGetResponse> sizes_and_checksums;
+        zkutil::ZooKeeper::MultiGetResponse sizes_and_checksums;
         {
             Strings file_names_paths;
             file_names_paths.reserve(batch.size());
@@ -561,7 +561,7 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
 
 
             ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getSizesAndChecksums", zookeeper_retries_info);
-            retries_ctl.retryLoop([&]()
+            retries_ctl.retryLoop([&]
             {
                 auto zk = getZooKeeper();
                 sizes_and_checksums = zk->get(file_names_paths);
@@ -579,9 +579,9 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
             for (size_t i = 0; i < batch.size(); ++i)
             {
                 auto file_name = batch[i];
-                if (sizes_and_checksums.value()[i].error != Coordination::Error::ZOK)
-                    throw zkutil::KeeperException(sizes_and_checksums.value()[i].error);
-                auto size_and_checksum = sizes_and_checksums.value()[i].data;
+                if (sizes_and_checksums[i].error != Coordination::Error::ZOK)
+                    throw zkutil::KeeperException(sizes_and_checksums[i].error);
+                const auto & size_and_checksum = sizes_and_checksums[i].data;
                 auto size = deserializeSizeAndChecksum(size_and_checksum).first;
 
                 if (size)
@@ -601,7 +601,7 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
             std::move(empty_files_infos.begin(), empty_files_infos.end(), std::back_inserter(file_infos));
         }
 
-        std::optional<zkutil::ZooKeeper::MultiGetResponse> non_empty_file_infos_serialized;
+        zkutil::ZooKeeper::MultiGetResponse non_empty_file_infos_serialized;
         ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getFileInfos", zookeeper_retries_info);
         retries_ctl.retryLoop([&]()
         {
@@ -613,9 +613,9 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
         for (size_t i = 0; i < non_empty_file_names.size(); ++i)
         {
             FileInfo file_info;
-            if (non_empty_file_infos_serialized.value()[i].error != Coordination::Error::ZOK)
-                throw zkutil::KeeperException(non_empty_file_infos_serialized.value()[i].error);
-            file_info = deserializeFileInfo(non_empty_file_infos_serialized.value()[i].data);
+            if (non_empty_file_infos_serialized[i].error != Coordination::Error::ZOK)
+                throw zkutil::KeeperException(non_empty_file_infos_serialized[i].error);
+            file_info = deserializeFileInfo(non_empty_file_infos_serialized[i].data);
             file_info.file_name = unescapeForFileName(non_empty_file_names[i]);
             non_empty_files_infos.emplace_back(std::move(file_info));
         }
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 9de8241cfbe..d20d036f04e 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -33,6 +33,12 @@ namespace CurrentMetrics
 namespace DB
 {
     class ZooKeeperLog;
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 }
 
 namespace zkutil
@@ -79,13 +85,23 @@ concept ZooKeeperResponse = std::derived_from<T, Coordination::Response>;
 template <ZooKeeperResponse ResponseType, bool try_multi>
 struct MultiReadResponses
 {
+    MultiReadResponses() = default;
+
     template <typename TResponses>
     explicit MultiReadResponses(TResponses responses_) : responses(std::move(responses_))
     {}
 
     size_t size() const
     {
-        return std::visit([](auto && resp) { return resp.size(); }, responses);
+        return std::visit(
+            [&]<typename TResponses>(const TResponses & resp) -> size_t
+            {
+                if constexpr (std::same_as<TResponses, std::monostate>)
+                    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "No responses set for MultiRead");
+                else
+                    return resp.size();
+            },
+            responses);
     }
 
     ResponseType & operator[](size_t index)
@@ -94,8 +110,10 @@ struct MultiReadResponses
             [&]<typename TResponses>(TResponses & resp) -> ResponseType &
             {
                 if constexpr (std::same_as<TResponses, RegularResponses>)
+                {
                     return dynamic_cast<ResponseType &>(*resp[index]);
-                else
+                }
+                else if constexpr (std::same_as<TResponses, ResponsesWithFutures>)
                 {
                     if constexpr (try_multi)
                     {
@@ -107,6 +125,10 @@ struct MultiReadResponses
                     }
                     return resp[index];
                 }
+                else
+                {
+                    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "No responses set for MultiRead");
+                }
             },
             responses);
     }
@@ -137,7 +159,7 @@ private:
         size_t size() const { return future_responses.size(); }
     };
 
-    std::variant<RegularResponses, ResponsesWithFutures> responses;
+    std::variant<std::monostate, RegularResponses, ResponsesWithFutures> responses;
 };
 
 /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.

From 7c14059f3f7dd2634e4dcd9336cb5fed9111262e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 6 Mar 2023 14:20:45 +0100
Subject: [PATCH 234/333] Update comments

---
 src/Client/TestHint.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h
index fff82502078..8d6b6dfc4e0 100644
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@@ -18,13 +18,13 @@ class Lexer;
 /// The following comment hints are supported:
 ///
 /// - "-- { serverError 60 }" -- in case of you are expecting server error.
-/// - "-- { serverError 16 | 36 }" -- in case of you are expecting one of the 2 errors
+/// - "-- { serverError 16, 36 }" -- in case of you are expecting one of the 2 errors
 ///
 /// - "-- { clientError 20 }" -- in case of you are expecting client error.
-/// - "-- { clientError 20 | 60 | 92 }" -- It's expected that the client will return one of the 3 errors.
+/// - "-- { clientError 20, 60, 92 }" -- It's expected that the client will return one of the 3 errors.
 ///
 /// - "-- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
-/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE | BAD_ARGUMENTS }" -- by error name.
+/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE,  BAD_ARGUMENTS }" -- by error name.
 ///
 /// - "-- { clientError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
 ///

From c13638904af33b71d76e4eae0b91b894588879b9 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 6 Mar 2023 14:38:54 +0100
Subject: [PATCH 235/333] Adjust the aggregation interval

---
 tests/ci/autoscale_runners_lambda/app.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 596e675ee24..7e3af3f6779 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -21,6 +21,11 @@ RUNNER_TYPE_LABELS = [
     "style-checker",
     "style-checker-aarch64",
 ]
+
+# 4 HOUR - is a balance to get the most precise values
+#   - Our longest possible running check is around 5h on the worst scenario
+#   - The long queue won't be wiped out and replaced, so the measurmenet is fine
+#   - If the data is spoiled by something, we are from the bills perspective
 QUEUE_QUERY = f"""SELECT
     last_status AS status,
     toUInt32(count()) AS length,
@@ -35,7 +40,7 @@ FROM
     FROM default.workflow_jobs
     WHERE has(labels, 'self-hosted')
         AND hasAny({RUNNER_TYPE_LABELS}, labels)
-        AND started_at > now() - INTERVAL 1 DAY
+        AND started_at > now() - INTERVAL 4 HOUR
     GROUP BY ALL
     HAVING last_status IN ('in_progress', 'queued')
 )

From 43da841e1e03552e908277cdfb5465f6addbfe62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 6 Mar 2023 14:53:32 +0100
Subject: [PATCH 236/333] Style

---
 src/Client/TestHint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h
index 8d6b6dfc4e0..d9650b9d4d7 100644
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@@ -24,7 +24,7 @@ class Lexer;
 /// - "-- { clientError 20, 60, 92 }" -- It's expected that the client will return one of the 3 errors.
 ///
 /// - "-- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
-/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE,  BAD_ARGUMENTS }" -- by error name.
+/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS }" -- by error name.
 ///
 /// - "-- { clientError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
 ///

From e4751f95e1860b52652f2c16d930ab6280d346c6 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 6 Mar 2023 14:56:05 +0100
Subject: [PATCH 237/333] Fix typo

---
 src/Interpreters/ProcessList.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index bf452775d27..a26844ae73c 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -415,7 +415,7 @@ CancellationCode QueryStatus::cancelQuery(bool)
     /// 2) We don't actually remove executor holder from executors in removePipelineExecutor,
     /// just mark that executor is invalid.
     /// So, it's ok to use a snapshot created above under a mutex, it won't be any differ from actual executors.
-    for (const auto & e : executors_shapshot)
+    for (const auto & e : executors_snapshot)
         e->cancel();
 
     return CancellationCode::CancelSent;

From 9e3794f7d73b983a7cf4afb1bad9f01b650bfb71 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 6 Mar 2023 14:36:53 +0000
Subject: [PATCH 238/333] Skip for sanitizer builds

---
 tests/integration/test_keeper_four_word_command/test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index d3fcfcc3014..412780c8f0f 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -682,6 +682,9 @@ def test_cmd_rqld(started_cluster):
 
 
 def test_cmd_clrs(started_cluster):
+    if node1.is_built_with_sanitizer():
+        return
+
     def get_memory_purges():
         return node1.query(
             "SELECT value FROM system.events WHERE event = 'MemoryAllocatorPurge' SETTINGS system_events_show_zero_values = 1"

From d8e5fb519557041dc03c64026775329720da3d2a Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Mon, 6 Mar 2023 15:12:15 +0000
Subject: [PATCH 239/333] Use switch, use lz4 by default

---
 src/Core/Settings.h                           |  6 ++--
 src/Core/SettingsChangesHistory.h             |  5 ++-
 .../Formats/Impl/ORCBlockOutputFormat.cpp     | 32 ++++++++---------
 .../Formats/Impl/ParquetBlockOutputFormat.cpp | 36 +++++++++----------
 4 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8eaecff9708..ae5d5326031 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -858,7 +858,7 @@ class IColumn;
     M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
     M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
     M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
-    M(ParquetCompression, output_format_parquet_compression_method, "snappy", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
+    M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
     M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
     M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
     M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
@@ -901,10 +901,10 @@ class IColumn;
     M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
     M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \
     M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, "Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.", 0) \
-    M(ArrowCompression, output_format_arrow_compression_method, "none", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \
+    M(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \
     \
     M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \
-    M(ORCCompression, output_format_orc_compression_method, "none", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
+    M(ORCCompression, output_format_orc_compression_method, "lz4", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
     \
     M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
     \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 04f328bb665..91b3dff3141 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -81,7 +81,10 @@ namespace SettingsChangesHistory
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
     {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
-              {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
+              {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
+              {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"},
+              {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"},
+              {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}}},
     {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
               {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
               {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 39cacde94ed..4264e9da4d6 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -23,7 +23,7 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 
 namespace DB
-{
+{7
 
 namespace ErrorCodes
 {
@@ -36,24 +36,22 @@ namespace
 
 orc::CompressionKind getORCCompression(FormatSettings::ORCCompression method)
 {
-    if (method == FormatSettings::ORCCompression::NONE)
-        return orc::CompressionKind::CompressionKind_NONE;
-
+    switch (method)
+    {
+        case FormatSettings::ORCCompression::NONE:
+            return orc::CompressionKind::CompressionKind_NONE;
+        case FormatSettings::ORCCompression::SNAPPY:
 #if USE_SNAPPY
-    if (method == FormatSettings::ORCCompression::SNAPPY)
-        return orc::CompressionKind::CompressionKind_SNAPPY;
+            return orc::CompressionKind::CompressionKind_SNAPPY;
 #endif
-
-    if (method == FormatSettings::ORCCompression::ZSTD)
-        return orc::CompressionKind::CompressionKind_ZSTD;
-
-    if (method == FormatSettings::ORCCompression::LZ4)
-        return orc::CompressionKind::CompressionKind_LZ4;
-
-    if (method == FormatSettings::ORCCompression::ZLIB)
-        return orc::CompressionKind::CompressionKind_ZLIB;
-
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Snappy compression method is not supported");
+        case FormatSettings::ORCCompression::ZSTD:
+            return orc::CompressionKind::CompressionKind_ZSTD;
+        case FormatSettings::ORCCompression::LZ4:
+            return orc::CompressionKind::CompressionKind_LZ4;
+        case FormatSettings::ORCCompression::ZLIB:
+            return orc::CompressionKind::CompressionKind_ZLIB;
+    }
 }
 
 }
diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index cedd8a9c54c..3695bb9d110 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -37,29 +37,27 @@ static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & se
 
 parquet::Compression::type getParquetCompression(FormatSettings::ParquetCompression method)
 {
-    if (method == FormatSettings::ParquetCompression::NONE)
-        return parquet::Compression::type::UNCOMPRESSED;
-
+    switch (method)
+    {
+        case FormatSettings::ParquetCompression::NONE:
+            return parquet::Compression::type::UNCOMPRESSED;
+        case FormatSettings::ParquetCompression::SNAPPY:
 #if USE_SNAPPY
-    if (method == FormatSettings::ParquetCompression::SNAPPY)
-        return parquet::Compression::type::SNAPPY;
+            return parquet::Compression::type::SNAPPY;
 #endif
-
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Snappy compression method is not supported");
+        case FormatSettings::ParquetCompression::BROTLI:
 #if USE_BROTLI
-    if (method == FormatSettings::ParquetCompression::BROTLI)
-        return parquet::Compression::type::BROTLI;
+            return parquet::Compression::type::BROTLI;
 #endif
-
-    if (method == FormatSettings::ParquetCompression::ZSTD)
-        return parquet::Compression::type::ZSTD;
-
-    if (method == FormatSettings::ParquetCompression::LZ4)
-        return parquet::Compression::type::LZ4;
-
-    if (method == FormatSettings::ParquetCompression::GZIP)
-        return parquet::Compression::type::GZIP;
-
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Brotli compression method is not supported");
+        case FormatSettings::ParquetCompression::ZSTD:
+            return parquet::Compression::type::ZSTD;
+        case FormatSettings::ParquetCompression::LZ4:
+            return parquet::Compression::type::LZ4;
+        case FormatSettings::ParquetCompression::GZIP:
+            return parquet::Compression::type::GZIP;
+    }
 }
 
 }

From 1b0d0c61082574733c7281a5511184dc98a6a59d Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 6 Mar 2023 15:29:13 +0000
Subject: [PATCH 240/333] fix skip indexes

---
 src/Storages/IndicesDescription.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index 591a9082f7a..a93ac248c98 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -96,8 +96,11 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
     result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);
     result.sample_block = result.expression->getSampleBlock();
 
-    for (const auto & elem : result.sample_block)
+    for (auto & elem : result.sample_block)
     {
+        if (!elem.column)
+            elem.column = elem.type->createColumn();
+
         result.column_names.push_back(elem.name);
         result.data_types.push_back(elem.type);
     }

From 3076e929802c56fa9e38770f56a02e0622233e02 Mon Sep 17 00:00:00 2001
From: Vladimir C <vdimir@clickhouse.com>
Date: Mon, 6 Mar 2023 16:56:02 +0100
Subject: [PATCH 241/333] Apply suggestions from code review

Co-authored-by: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
---
 src/Interpreters/GraceHashJoin.h               | 1 -
 src/Processors/Transforms/JoiningTransform.cpp | 5 ++++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index cbc0f2f3266..b8d83f4cad0 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -140,7 +140,6 @@ private:
 
     FileBucket * current_bucket = nullptr;
 
-    /// Function `getDelayedBlocks` is a critical section, we process only one bucket at a time.
     mutable std::mutex current_bucket_mutex;
 
     InMemoryJoinPtr hash_join;
diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp
index 120ff51cad1..bba8ec6fa16 100644
--- a/src/Processors/Transforms/JoiningTransform.cpp
+++ b/src/Processors/Transforms/JoiningTransform.cpp
@@ -328,7 +328,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
     }
 
     if (!output.canPush())
+    {
+        input.setNotNeeded();
         return Status::PortFull;
+    }
 
     if (inputs.size() != 1 && outputs.size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have exactly one input port");
@@ -419,7 +422,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
         if (output.isFinished())
         {
             /// If at least one output is finished, then we have read all data from buckets.
-            /// Some workers can still can busy with joining the last chunk of data in memory,
+            /// Some workers can still be busy with joining the last chunk of data in memory,
             /// but after that they also will finish when they will try to get next chunk.
             finished = true;
             continue;

From 8664a20a9e1bcca40616aba9fc7aff3d938a585c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 6 Mar 2023 17:31:27 +0100
Subject: [PATCH 242/333] Fix fast tests

---
 tests/queries/0_stateless/02480_tlp_nan.reference | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/queries/0_stateless/02480_tlp_nan.reference b/tests/queries/0_stateless/02480_tlp_nan.reference
index ea4aa44fa89..befd1f66564 100644
--- a/tests/queries/0_stateless/02480_tlp_nan.reference
+++ b/tests/queries/0_stateless/02480_tlp_nan.reference
@@ -1,10 +1,21 @@
+-- {echo}
+SELECT sqrt(-1) as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1;
 nan	0	1	0
+SELECT sqrt(-1) as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0;
 nan	0	1	0
+SELECT -inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1;
 -inf	0	1	0
+SELECT -inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0;
 -inf	0	1	0
+SELECT NULL as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1;
 \N	\N	\N	1
+SELECT NULL as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0;
 \N	\N	\N	1
+SELECT inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1;
 inf	0	1	0
+SELECT inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0;
 inf	0	1	0
+SELECT nan as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1;
 nan	0	1	0
+SELECT nan as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0;
 nan	0	1	0

From 4bc443f3c757a7c3c3e61b9699b4cc57c468c178 Mon Sep 17 00:00:00 2001
From: Mike Kot <myrrc@double.cloud>
Date: Fri, 3 Mar 2023 01:27:54 +0300
Subject: [PATCH 243/333] initial solution

---
 base/base/TypeList.h                          |   3 +-
 base/base/TypePair.h                          |   4 -
 src/Common/StackTrace.cpp                     | 516 ++++++++----------
 src/Common/StackTrace.h                       |  13 +-
 src/Daemon/BaseDaemon.cpp                     |   2 +-
 src/DataTypes/DataTypeLowCardinality.cpp      |   2 +-
 src/Functions/FunctionsEmbeddedDictionaries.h |  38 +-
 src/Functions/array/arrayIntersect.cpp        |   8 +-
 src/Interpreters/CrashLog.cpp                 |   2 +-
 9 files changed, 270 insertions(+), 318 deletions(-)
 delete mode 100644 base/base/TypePair.h

diff --git a/base/base/TypeList.h b/base/base/TypeList.h
index 244403b1c6b..310f0c0c586 100644
--- a/base/base/TypeList.h
+++ b/base/base/TypeList.h
@@ -4,7 +4,6 @@
 #include <type_traits>
 #include <utility>
 #include "defines.h"
-#include "TypePair.h"
 
 /// General-purpose typelist. Easy on compilation times as it does not use recursion.
 template <typename ...Args>
@@ -28,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in
     constexpr Root<Args...> changeRoot(TypeList<Args...>) { return {}; }
 
     template <typename F, typename ...Args>
-    constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(Id<Args>{}), ...); }
+    constexpr void forEach(TypeList<Args...>, F && f) { (std::forward<F>(f)(TypeList<Args>{}), ...); }
 }
 
 template <typename TypeListLeft, typename TypeListRight>
diff --git a/base/base/TypePair.h b/base/base/TypePair.h
deleted file mode 100644
index 8c2f380618c..00000000000
--- a/base/base/TypePair.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-template <typename T, typename V> struct TypePair {};
-template <typename T> struct Id {};
diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index 0d47d3dcb92..eb6696e8c05 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -1,18 +1,21 @@
-#include <Common/StackTrace.h>
+#include "StackTrace.h"
+
+#include <base/FnTraits.h>
+#include <base/constexpr_helpers.h>
+#include <base/demangle.h>
 
 #include <Common/Dwarf.h>
 #include <Common/Elf.h>
-#include <Common/SymbolIndex.h>
 #include <Common/MemorySanitizer.h>
-#include <base/demangle.h>
+#include <Common/SymbolIndex.h>
 
 #include <atomic>
-#include <cstring>
 #include <filesystem>
+#include <map>
 #include <mutex>
 #include <sstream>
 #include <unordered_map>
-#include <map>
+#include <fmt/format.h>
 
 #include "config.h"
 
@@ -20,24 +23,23 @@
 #    include <libunwind.h>
 #endif
 
-
 namespace
 {
-    /// Currently this variable is set up once on server startup.
-    /// But we use atomic just in case, so it is possible to be modified at runtime.
-    std::atomic<bool> show_addresses = true;
+/// Currently this variable is set up once on server startup.
+/// But we use atomic just in case, so it is possible to be modified at runtime.
+std::atomic<bool> show_addresses = true;
 
-    bool shouldShowAddress(const void * addr)
-    {
-        /// If the address is less than 4096, most likely it is a nullptr dereference with offset,
-        /// and showing this offset is secure nevertheless.
-        /// NOTE: 4096 is the page size on x86 and it can be different on other systems,
-        /// but for the purpose of this branch, it does not matter.
-        if (reinterpret_cast<uintptr_t>(addr) < 4096)
-            return true;
+bool shouldShowAddress(const void * addr)
+{
+    /// If the address is less than 4096, most likely it is a nullptr dereference with offset,
+    /// and showing this offset is secure nevertheless.
+    /// NOTE: 4096 is the page size on x86 and it can be different on other systems,
+    /// but for the purpose of this branch, it does not matter.
+    if (reinterpret_cast<uintptr_t>(addr) < 4096)
+        return true;
 
-        return show_addresses.load(std::memory_order_relaxed);
-    }
+    return show_addresses.load(std::memory_order_relaxed);
+}
 }
 
 void StackTrace::setShowAddresses(bool show)
@@ -45,155 +47,129 @@ void StackTrace::setShowAddresses(bool show)
     show_addresses.store(show, std::memory_order_relaxed);
 }
 
+std::string SigsegvErrorString(const siginfo_t & info, [[maybe_unused]] const ucontext_t & context)
+{
+    using namespace std::string_literals;
+    std::string address
+        = info.si_addr == nullptr ? "NULL pointer"s : (shouldShowAddress(info.si_addr) ? fmt::format("{}", info.si_addr) : ""s);
+
+    const std::string_view access =
+#if defined(__x86_64__) && !defined(OS_FREEBSD) && !defined(OS_DARWIN) && !defined(__arm__) && !defined(__powerpc__)
+        (context.uc_mcontext.gregs[REG_ERR] & 0x02) ? "write" : "read";
+#else
+        "";
+#endif
+
+    std::string_view message;
+
+    switch (info.si_code)
+    {
+        case SEGV_ACCERR:
+            message = "Attempted access has violated the permissions assigned to the memory area";
+            break;
+        case SEGV_MAPERR:
+            message = "Address not mapped to object";
+            break;
+        default:
+            message = "Unknown si_code";
+            break;
+    }
+
+    return fmt::format("Address: {}. Access: {}. {}.", std::move(address), access, message);
+}
+
+constexpr std::string_view SigbusErrorString(int si_code)
+{
+    switch (si_code)
+    {
+        case BUS_ADRALN:
+            return "Invalid address alignment.";
+        case BUS_ADRERR:
+            return "Non-existent physical address.";
+        case BUS_OBJERR:
+            return "Object specific hardware error.";
+
+            // Linux specific
+#if defined(BUS_MCEERR_AR)
+        case BUS_MCEERR_AR:
+            return "Hardware memory error: action required.";
+#endif
+#if defined(BUS_MCEERR_AO)
+        case BUS_MCEERR_AO:
+            return "Hardware memory error: action optional.";
+#endif
+        default:
+            return "Unknown si_code.";
+    }
+}
+
+constexpr std::string_view SigfpeErrorString(int si_code)
+{
+    switch (si_code)
+    {
+        case FPE_INTDIV:
+            return "Integer divide by zero.";
+        case FPE_INTOVF:
+            return "Integer overflow.";
+        case FPE_FLTDIV:
+            return "Floating point divide by zero.";
+        case FPE_FLTOVF:
+            return "Floating point overflow.";
+        case FPE_FLTUND:
+            return "Floating point underflow.";
+        case FPE_FLTRES:
+            return "Floating point inexact result.";
+        case FPE_FLTINV:
+            return "Floating point invalid operation.";
+        case FPE_FLTSUB:
+            return "Subscript out of range.";
+        default:
+            return "Unknown si_code.";
+    }
+}
+
+constexpr std::string_view SigillErrorString(int si_code)
+{
+    switch (si_code)
+    {
+        case ILL_ILLOPC:
+            return "Illegal opcode.";
+        case ILL_ILLOPN:
+            return "Illegal operand.";
+        case ILL_ILLADR:
+            return "Illegal addressing mode.";
+        case ILL_ILLTRP:
+            return "Illegal trap.";
+        case ILL_PRVOPC:
+            return "Privileged opcode.";
+        case ILL_PRVREG:
+            return "Privileged register.";
+        case ILL_COPROC:
+            return "Coprocessor error.";
+        case ILL_BADSTK:
+            return "Internal stack error.";
+        default:
+            return "Unknown si_code.";
+    }
+}
 
 std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused]] const ucontext_t & context)
 {
-    std::stringstream error;        // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    error.exceptions(std::ios::failbit);
     switch (sig)
     {
         case SIGSEGV:
-        {
-            /// Print info about address and reason.
-            if (nullptr == info.si_addr)
-                error << "Address: NULL pointer.";
-            else if (shouldShowAddress(info.si_addr))
-                error << "Address: " << info.si_addr;
-
-#if defined(__x86_64__) && !defined(OS_FREEBSD) && !defined(OS_DARWIN) && !defined(__arm__) && !defined(__powerpc__)
-            auto err_mask = context.uc_mcontext.gregs[REG_ERR];
-            if ((err_mask & 0x02))
-                error << " Access: write.";
-            else
-                error << " Access: read.";
-#endif
-
-            switch (info.si_code)
-            {
-                case SEGV_ACCERR:
-                    error << " Attempted access has violated the permissions assigned to the memory area.";
-                    break;
-                case SEGV_MAPERR:
-                    error << " Address not mapped to object.";
-                    break;
-                default:
-                    error << " Unknown si_code.";
-                    break;
-            }
-            break;
-        }
-
+            return SigsegvErrorString(info, context);
         case SIGBUS:
-        {
-            switch (info.si_code)
-            {
-                case BUS_ADRALN:
-                    error << "Invalid address alignment.";
-                    break;
-                case BUS_ADRERR:
-                    error << "Non-existent physical address.";
-                    break;
-                case BUS_OBJERR:
-                    error << "Object specific hardware error.";
-                    break;
-
-                    // Linux specific
-#if defined(BUS_MCEERR_AR)
-                case BUS_MCEERR_AR:
-                    error << "Hardware memory error: action required.";
-                    break;
-#endif
-#if defined(BUS_MCEERR_AO)
-                case BUS_MCEERR_AO:
-                    error << "Hardware memory error: action optional.";
-                    break;
-#endif
-
-                default:
-                    error << "Unknown si_code.";
-                    break;
-            }
-            break;
-        }
-
+            return std::string{SigbusErrorString(info.si_code)};
         case SIGILL:
-        {
-            switch (info.si_code)
-            {
-                case ILL_ILLOPC:
-                    error << "Illegal opcode.";
-                    break;
-                case ILL_ILLOPN:
-                    error << "Illegal operand.";
-                    break;
-                case ILL_ILLADR:
-                    error << "Illegal addressing mode.";
-                    break;
-                case ILL_ILLTRP:
-                    error << "Illegal trap.";
-                    break;
-                case ILL_PRVOPC:
-                    error << "Privileged opcode.";
-                    break;
-                case ILL_PRVREG:
-                    error << "Privileged register.";
-                    break;
-                case ILL_COPROC:
-                    error << "Coprocessor error.";
-                    break;
-                case ILL_BADSTK:
-                    error << "Internal stack error.";
-                    break;
-                default:
-                    error << "Unknown si_code.";
-                    break;
-            }
-            break;
-        }
-
+            return std::string{SigillErrorString(info.si_code)};
         case SIGFPE:
-        {
-            switch (info.si_code)
-            {
-                case FPE_INTDIV:
-                    error << "Integer divide by zero.";
-                    break;
-                case FPE_INTOVF:
-                    error << "Integer overflow.";
-                    break;
-                case FPE_FLTDIV:
-                    error << "Floating point divide by zero.";
-                    break;
-                case FPE_FLTOVF:
-                    error << "Floating point overflow.";
-                    break;
-                case FPE_FLTUND:
-                    error << "Floating point underflow.";
-                    break;
-                case FPE_FLTRES:
-                    error << "Floating point inexact result.";
-                    break;
-                case FPE_FLTINV:
-                    error << "Floating point invalid operation.";
-                    break;
-                case FPE_FLTSUB:
-                    error << "Subscript out of range.";
-                    break;
-                default:
-                    error << "Unknown si_code.";
-                    break;
-            }
-            break;
-        }
-
+            return std::string{SigfpeErrorString(info.si_code)};
         case SIGTSTP:
-        {
-            error << "This is a signal used for debugging purposes by the user.";
-            break;
-        }
+            return "This is a signal used for debugging purposes by the user.";
+        default:
+            return "";
     }
-
-    return error.str();
 }
 
 static void * getCallerAddress(const ucontext_t & context)
@@ -207,10 +183,8 @@ static void * getCallerAddress(const ucontext_t & context)
 #    else
     return reinterpret_cast<void *>(context.uc_mcontext.gregs[REG_RIP]);
 #    endif
-
 #elif defined(OS_DARWIN) && defined(__aarch64__)
     return reinterpret_cast<void *>(context.uc_mcontext->__ss.__pc);
-
 #elif defined(OS_FREEBSD) && defined(__aarch64__)
     return reinterpret_cast<void *>(context.uc_mcontext.mc_gpregs.gp_elr);
 #elif defined(__aarch64__)
@@ -228,20 +202,17 @@ static void * getCallerAddress(const ucontext_t & context)
 #endif
 }
 
+// FIXME: looks like this is used only for Sentry but duplicates the whole algo, maybe replace?
 void StackTrace::symbolize(
-    const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset,
-    size_t size, StackTrace::Frames & frames)
+    const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset, size_t size, StackTrace::Frames & frames)
 {
 #if defined(__ELF__) && !defined(OS_FREEBSD)
-
     auto symbol_index_ptr = DB::SymbolIndex::instance();
     const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
     std::unordered_map<std::string, DB::Dwarf> dwarfs;
 
     for (size_t i = 0; i < offset; ++i)
-    {
         frames[i].virtual_addr = frame_pointers[i];
-    }
 
     for (size_t i = offset; i < size; ++i)
     {
@@ -254,7 +225,7 @@ void StackTrace::symbolize(
         if (object)
         {
             current_frame.object = object->name;
-            if (std::filesystem::exists(current_frame.object.value()))
+            if (std::error_code ec; std::filesystem::exists(current_frame.object.value(), ec) && !ec)
             {
                 auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
 
@@ -269,34 +240,19 @@ void StackTrace::symbolize(
             }
         }
         else
-        {
             current_frame.object = "?";
-        }
 
-        const auto * symbol = symbol_index.findSymbol(current_frame.virtual_addr);
-        if (symbol)
-        {
-            int status = 0;
-            current_frame.symbol = demangle(symbol->name, status);
-        }
+        if (const auto * symbol = symbol_index.findSymbol(current_frame.virtual_addr))
+            current_frame.symbol = demangle(symbol->name);
         else
-        {
             current_frame.symbol = "?";
-        }
     }
 #else
     for (size_t i = 0; i < size; ++i)
-    {
         frames[i].virtual_addr = frame_pointers[i];
-    }
 #endif
 }
 
-StackTrace::StackTrace()
-{
-    tryCapture();
-}
-
 StackTrace::StackTrace(const ucontext_t & signal_context)
 {
     tryCapture();
@@ -325,81 +281,97 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
     }
 }
 
-StackTrace::StackTrace(NoCapture)
-{
-}
-
 void StackTrace::tryCapture()
 {
-    size = 0;
 #if USE_UNWIND
     size = unw_backtrace(frame_pointers.data(), capacity);
     __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0]));
+#else
+    size = 0;
 #endif
 }
 
-size_t StackTrace::getSize() const
+// Clickhouse uses bundled libc++ so type names will be same on every system thus it's save to hardcode them
+constexpr std::pair<std::string_view, std::string_view> replacements[]
+    = {{"::__1", ""}, {"std::basic_string<char, std::char_traits<char>, std::allocator<char>>", "String"}};
+
+String collapseNames(String && haystack)
 {
-    return size;
+    // TODO: surely there is a written version already for better in place search&replace
+    for (auto [needle, to] : replacements)
+    {
+        size_t pos = 0;
+        while ((pos = haystack.find(needle, pos)) != std::string::npos)
+        {
+            haystack.replace(pos, needle.length(), to);
+            pos += to.length();
+        }
+    }
+
+    return haystack;
 }
 
-size_t StackTrace::getOffset() const
+struct StackTraceRefTriple
 {
-    return offset;
+    const StackTrace::FramePointers & pointers;
+    size_t offset;
+    size_t size;
+};
+
+struct StackTraceTriple
+{
+    StackTrace::FramePointers pointers;
+    size_t offset;
+    size_t size;
+};
+
+template <class T>
+concept MaybeRef = std::is_same_v<T, StackTraceTriple> || std::is_same_v<T, StackTraceRefTriple>;
+
+constexpr bool operator<(const MaybeRef auto & left, const MaybeRef auto & right)
+{
+    return std::tuple{left.pointers, left.size, left.offset} < std::tuple{right.pointers, right.size, right.offset};
 }
 
-const StackTrace::FramePointers & StackTrace::getFramePointers() const
+static void
+toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & stack_trace, Fn<void(std::string_view)> auto && callback)
 {
-    return frame_pointers;
-}
-
-static void toStringEveryLineImpl(
-    [[maybe_unused]] bool fatal,
-    const StackTrace::FramePointers & frame_pointers,
-    size_t offset,
-    size_t size,
-    std::function<void(const std::string &)> callback)
-{
-    if (size == 0)
+    if (stack_trace.size == 0)
         return callback("<Empty trace>");
 
 #if defined(__ELF__) && !defined(OS_FREEBSD)
-    auto symbol_index_ptr = DB::SymbolIndex::instance();
-    const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
-    std::unordered_map<std::string, DB::Dwarf> dwarfs;
-
-    std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+    std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     out.exceptions(std::ios::failbit);
 
-    for (size_t i = offset; i < size; ++i)
+    using enum DB::Dwarf::LocationInfoMode;
+    const auto mode = fatal ? FULL_WITH_INLINE : FAST;
+
+    auto symbol_index_ptr = DB::SymbolIndex::instance();
+    const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
+    std::unordered_map<String, DB::Dwarf> dwarfs;
+
+    for (size_t i = stack_trace.offset; i < stack_trace.size; ++i)
     {
         std::vector<DB::Dwarf::SymbolizedFrame> inline_frames;
-        const void * virtual_addr = frame_pointers[i];
+        const void * virtual_addr = stack_trace.pointers[i];
         const auto * object = symbol_index.findObject(virtual_addr);
         uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
         const void * physical_addr = reinterpret_cast<const void *>(uintptr_t(virtual_addr) - virtual_offset);
 
         out << i << ". ";
 
-        if (object)
+        if (std::error_code ec; object && std::filesystem::exists(object->name, ec) && !ec)
         {
-            if (std::filesystem::exists(object->name))
-            {
-                auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
+            auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
 
-                DB::Dwarf::LocationInfo location;
-                auto mode = fatal ? DB::Dwarf::LocationInfoMode::FULL_WITH_INLINE : DB::Dwarf::LocationInfoMode::FAST;
-                if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
-                    out << location.file.toString() << ":" << location.line << ": ";
-            }
+            DB::Dwarf::LocationInfo location;
+
+            if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode, inline_frames))
+                out << location.file.toString() << ":" << location.line << ": ";
         }
 
-        const auto * symbol = symbol_index.findSymbol(virtual_addr);
-        if (symbol)
-        {
-            int status = 0;
-            out << demangle(symbol->name, status);
-        }
+        if (const auto * const symbol = symbol_index.findSymbol(virtual_addr))
+            out << collapseNames(demangle(symbol->name));
         else
             out << "?";
 
@@ -411,64 +383,31 @@ static void toStringEveryLineImpl(
         for (size_t j = 0; j < inline_frames.size(); ++j)
         {
             const auto & frame = inline_frames[j];
-            int status = 0;
-            callback(fmt::format("{}.{}. inlined from {}:{}: {}",
-                     i, j+1, frame.location.file.toString(), frame.location.line, demangle(frame.name, status)));
+            callback(fmt::format(
+                "{}.{}. inlined from {}:{}: {}",
+                i,
+                j + 1,
+                frame.location.file.toString(),
+                frame.location.line,
+                collapseNames(demangle(frame.name))));
         }
 
         callback(out.str());
         out.str({});
     }
 #else
-    std::stringstream out;  // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    out.exceptions(std::ios::failbit);
-
-    for (size_t i = offset; i < size; ++i)
-    {
-        const void * addr = frame_pointers[i];
-        if (shouldShowAddress(addr))
-        {
-            out << i << ". " << addr;
-
-            callback(out.str());
-            out.str({});
-        }
-    }
+    for (size_t i = stack_trace.offset; i < stack_trace.size; ++i)
+        if (const void * const addr = stack_trace.pointers[i]; shouldShowAddress(addr))
+            callback(fmt::format("{}. {}", i, addr));
 #endif
 }
 
-static std::string toStringImpl(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size)
+void StackTrace::toStringEveryLine(std::function<void(std::string_view)> callback) const
 {
-    std::stringstream out;      // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    out.exceptions(std::ios::failbit);
-    toStringEveryLineImpl(false, frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; });
-    return out.str();
+    toStringEveryLineImpl(true, {frame_pointers, offset, size}, std::move(callback));
 }
 
-void StackTrace::toStringEveryLine(std::function<void(const std::string &)> callback) const
-{
-    toStringEveryLineImpl(true, frame_pointers, offset, size, std::move(callback));
-}
-
-
-std::string StackTrace::toString() const
-{
-    return toStringStatic(frame_pointers, offset, size);
-}
-
-std::string StackTrace::toString(void ** frame_pointers_, size_t offset, size_t size)
-{
-    __msan_unpoison(frame_pointers_, size * sizeof(*frame_pointers_));
-
-    StackTrace::FramePointers frame_pointers_copy{};
-    for (size_t i = 0; i < size; ++i)
-        frame_pointers_copy[i] = frame_pointers_[i];
-
-    return toStringStatic(frame_pointers_copy, offset, size);
-}
-
-using StackTraceRepresentation = std::tuple<StackTrace::FramePointers, size_t, size_t>;
-using StackTraceCache = std::map<StackTraceRepresentation, std::string>;
+using StackTraceCache = std::map<StackTraceTriple, String, std::less<>>;
 
 static StackTraceCache & cacheInstance()
 {
@@ -478,21 +417,40 @@ static StackTraceCache & cacheInstance()
 
 static std::mutex stacktrace_cache_mutex;
 
-std::string StackTrace::toStringStatic(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size)
+String toStringCached(const StackTrace::FramePointers & pointers, size_t offset, size_t size)
 {
     /// Calculation of stack trace text is extremely slow.
     /// We use simple cache because otherwise the server could be overloaded by trash queries.
     /// Note that this cache can grow unconditionally, but practically it should be small.
     std::lock_guard lock{stacktrace_cache_mutex};
 
-    StackTraceRepresentation key{frame_pointers, offset, size};
-    auto & cache = cacheInstance();
-    if (cache.contains(key))
-        return cache[key];
+    StackTraceCache & cache = cacheInstance();
 
-    auto result = toStringImpl(frame_pointers, offset, size);
-    cache[key] = result;
-    return result;
+    if (auto it = cache.find(StackTraceRefTriple{pointers, offset, size}); it != cache.end())
+        return it->second;
+    else
+    {
+        std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        out.exceptions(std::ios::failbit);
+        toStringEveryLineImpl(false, {pointers, offset, size}, [&](std::string_view str) { out << str << '\n'; });
+
+        return cache.emplace(StackTraceTriple{pointers, offset, size}, out.str()).first->second;
+    }
+}
+
+std::string StackTrace::toString() const
+{
+    return toStringCached(frame_pointers, offset, size);
+}
+
+std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size)
+{
+    __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw));
+
+    StackTrace::FramePointers frame_pointers;
+    std::copy_n(frame_pointers_raw, size, frame_pointers.begin());
+
+    return toStringCached(frame_pointers, offset, size);
 }
 
 void StackTrace::dropCache()
diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h
index f07c05107ee..3940c880c5b 100644
--- a/src/Common/StackTrace.h
+++ b/src/Common/StackTrace.h
@@ -46,26 +46,25 @@ public:
     using Frames = std::array<Frame, capacity>;
 
     /// Tries to capture stack trace
-    StackTrace();
+    inline StackTrace() { tryCapture(); }
 
     /// Tries to capture stack trace. Fallbacks on parsing caller address from
     /// signal context if no stack trace could be captured
     explicit StackTrace(const ucontext_t & signal_context);
 
     /// Creates empty object for deferred initialization
-    explicit StackTrace(NoCapture);
+    explicit inline StackTrace(NoCapture) {}
 
-    size_t getSize() const;
-    size_t getOffset() const;
-    const FramePointers & getFramePointers() const;
+    constexpr size_t getSize() const { return size; }
+    constexpr size_t getOffset() const { return offset; }
+    const FramePointers & getFramePointers() const { return frame_pointers; }
     std::string toString() const;
 
     static std::string toString(void ** frame_pointers, size_t offset, size_t size);
-    static std::string toStringStatic(const FramePointers & frame_pointers, size_t offset, size_t size);
     static void dropCache();
     static void symbolize(const FramePointers & frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames);
 
-    void toStringEveryLine(std::function<void(const std::string &)> callback) const;
+    void toStringEveryLine(std::function<void(std::string_view)> callback) const;
 
     /// Displaying the addresses can be disabled for security reasons.
     /// If you turn off addresses, it will be more secure, but we will be unable to help you with debugging.
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index fbfa9e68774..18c4c0d97a0 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -374,7 +374,7 @@ private:
         }
 
         /// Write symbolized stack trace line by line for better grep-ability.
-        stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, fmt::runtime(s)); });
+        stack_trace.toStringEveryLine([&](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
 
 #if defined(OS_LINUX)
         /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index 5e3a1cd3a0e..8293455cabc 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -55,7 +55,7 @@ namespace
         }
 
         template <typename T>
-        void operator()(Id<T>)
+        void operator()(TypeList<T>)
         {
             if (typeid_cast<const DataTypeNumber<T> *>(&keys_type))
                 column = creator(static_cast<ColumnVector<T> *>(nullptr));
diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h
index e54ab0277d5..af040c6ab93 100644
--- a/src/Functions/FunctionsEmbeddedDictionaries.h
+++ b/src/Functions/FunctionsEmbeddedDictionaries.h
@@ -166,12 +166,12 @@ public:
 
         if (arguments[0]->getName() != TypeName<T>)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {} (must be {})",
-                arguments[0]->getName(), getName(), String(TypeName<T>));
+                arguments[0]->getName(), getName(), TypeName<T>);
 
-        if (arguments.size() == 2 && arguments[1]->getName() != TypeName<String>)
+        if (arguments.size() == 2 && arguments[1]->getName() != "String")
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal type {} of the second ('point of view') argument of function {} (must be {})",
-                            arguments[1]->getName(), getName(), String(TypeName<T>));
+                            "Illegal type {} of the second ('point of view') argument of function {} (must be String)",
+                            arguments[1]->getName(), getName());
 
         return arguments[0];
     }
@@ -257,16 +257,16 @@ public:
 
         if (arguments[0]->getName() != TypeName<T>)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {} (must be {})",
-                arguments[0]->getName(), getName(), String(TypeName<T>));
+                arguments[0]->getName(), getName(), TypeName<T>);
 
         if (arguments[1]->getName() != TypeName<T>)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {} (must be {})",
-                arguments[1]->getName(), getName(), String(TypeName<T>));
+                arguments[1]->getName(), getName(), TypeName<T>);
 
-        if (arguments.size() == 3 && arguments[2]->getName() != TypeName<String>)
+        if (arguments.size() == 3 && arguments[2]->getName() != "String")
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal type {} of the third ('point of view') argument of function {} (must be {})",
-                            arguments[2]->getName(), getName(), String(TypeName<String>));
+                            "Illegal type {} of the third ('point of view') argument of function {} (must be String)",
+                            arguments[2]->getName(), getName());
 
         return std::make_shared<DataTypeUInt8>();
     }
@@ -390,12 +390,12 @@ public:
 
         if (arguments[0]->getName() != TypeName<T>)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {} (must be {})",
-            arguments[0]->getName(), getName(), String(TypeName<T>));
+            arguments[0]->getName(), getName(), TypeName<T>);
 
-        if (arguments.size() == 2 && arguments[1]->getName() != TypeName<String>)
+        if (arguments.size() == 2 && arguments[1]->getName() != "String")
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal type {} of the second ('point of view') argument of function {} (must be {})",
-                            arguments[1]->getName(), getName(), String(TypeName<String>));
+                            "Illegal type {} of the second ('point of view') argument of function {} (must be String)",
+                            arguments[1]->getName(), getName());
 
         return std::make_shared<DataTypeArray>(arguments[0]);
     }
@@ -591,15 +591,15 @@ public:
                 "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
                 getName(), arguments.size());
 
-        if (arguments[0]->getName() != TypeName<UInt32>)
+        if (arguments[0]->getName() != "UInt32")
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal type {} of the first argument of function {} (must be {})",
-                            arguments[0]->getName(), getName(), String(TypeName<UInt32>));
+                            "Illegal type {} of the first argument of function {} (must be UInt32)",
+                            arguments[0]->getName(), getName());
 
-        if (arguments.size() == 2 && arguments[1]->getName() != TypeName<String>)
+        if (arguments.size() == 2 && arguments[1]->getName() != "String")
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Illegal type {} of the second argument of function {} (must be {})",
-                            arguments[0]->getName(), getName(), String(TypeName<String>));
+                            "Illegal type {} of the second argument of function {} (must be String)",
+                            arguments[0]->getName(), getName());
 
         return std::make_shared<DataTypeString>();
     }
diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp
index 1a718595a3b..c6f0a5afa62 100644
--- a/src/Functions/array/arrayIntersect.cpp
+++ b/src/Functions/array/arrayIntersect.cpp
@@ -107,7 +107,7 @@ private:
             : arrays(arrays_), data_type(data_type_), result(result_) {}
 
         template <class T>
-        void operator()(Id<T>);
+        void operator()(TypeList<T>);
     };
 
     struct DecimalExecutor
@@ -120,7 +120,7 @@ private:
             : arrays(arrays_), data_type(data_type_), result(result_) {}
 
         template <class T>
-        void operator()(Id<T>);
+        void operator()(TypeList<T>);
     };
 };
 
@@ -446,7 +446,7 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg
 }
 
 template <class T>
-void FunctionArrayIntersect::NumberExecutor::operator()(Id<T>)
+void FunctionArrayIntersect::NumberExecutor::operator()(TypeList<T>)
 {
     using Container = ClearableHashMapWithStackMemory<T, size_t, DefaultHash<T>,
         INITIAL_SIZE_DEGREE>;
@@ -456,7 +456,7 @@ void FunctionArrayIntersect::NumberExecutor::operator()(Id<T>)
 }
 
 template <class T>
-void FunctionArrayIntersect::DecimalExecutor::operator()(Id<T>)
+void FunctionArrayIntersect::DecimalExecutor::operator()(TypeList<T>)
 {
     using Container = ClearableHashMapWithStackMemory<T, size_t, DefaultHash<T>,
         INITIAL_SIZE_DEGREE>;
diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp
index 47a9d62fba6..f1f0ffb6f60 100644
--- a/src/Interpreters/CrashLog.cpp
+++ b/src/Interpreters/CrashLog.cpp
@@ -80,7 +80,7 @@ void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, co
         for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
             trace.push_back(reinterpret_cast<uintptr_t>(stack_trace.getFramePointers()[i]));
 
-        stack_trace.toStringEveryLine([&trace_full](const std::string & line) { trace_full.push_back(line); });
+        stack_trace.toStringEveryLine([&trace_full](std::string_view line) { trace_full.push_back(line); });
 
         CrashLogElement element{static_cast<time_t>(time / 1000000000), time, signal, thread_id, query_id, trace, trace_full};
         crash_log_owned->add(element);

From 07a9e486e6aafa184d7a3ea244db0758137128c7 Mon Sep 17 00:00:00 2001
From: Mike Kot <myrrc@double.cloud>
Date: Sat, 4 Mar 2023 21:37:28 +0300
Subject: [PATCH 244/333] memset for frame pointers'

---
 src/Common/StackTrace.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index eb6696e8c05..0aebaf9801c 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -425,14 +425,15 @@ String toStringCached(const StackTrace::FramePointers & pointers, size_t offset,
     std::lock_guard lock{stacktrace_cache_mutex};
 
     StackTraceCache & cache = cacheInstance();
+    const StackTraceRefTriple key{pointers, offset, size};
 
-    if (auto it = cache.find(StackTraceRefTriple{pointers, offset, size}); it != cache.end())
+    if (auto it = cache.find(key); it != cache.end())
         return it->second;
     else
     {
-        std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        std::ostringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
         out.exceptions(std::ios::failbit);
-        toStringEveryLineImpl(false, {pointers, offset, size}, [&](std::string_view str) { out << str << '\n'; });
+        toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; });
 
         return cache.emplace(StackTraceTriple{pointers, offset, size}, out.str()).first->second;
     }
@@ -447,7 +448,7 @@ std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size
 {
     __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw));
 
-    StackTrace::FramePointers frame_pointers;
+    StackTrace::FramePointers frame_pointers{};
     std::copy_n(frame_pointers_raw, size, frame_pointers.begin());
 
     return toStringCached(frame_pointers, offset, size);

From e50fd6e1ef67eaf7c69e173b9a4486c90e598f1f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 6 Mar 2023 16:35:17 +0000
Subject: [PATCH 245/333] Update version_date.tsv and changelogs after
 v23.2.3.17-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.2.3.17-stable.md | 23 +++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  5 +++++
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.2.3.17-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 09395befdad..500249b5bd6 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
     esac
 
 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.3.17"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 472f25eed2d..5f613eea4d0 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.3.17"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5dbb244c298..3e99bca8bc2 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.2.1.2537"
+ARG VERSION="23.2.3.17"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.2.3.17-stable.md b/docs/changelogs/v23.2.3.17-stable.md
new file mode 100644
index 00000000000..fb2c4e394dc
--- /dev/null
+++ b/docs/changelogs/v23.2.3.17-stable.md
@@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.2.3.17-stable (dec18bf7281) FIXME as compared to v23.2.2.20-stable (f6c269c8df2)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#46907](https://github.com/ClickHouse/ClickHouse/issues/46907): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#47091](https://github.com/ClickHouse/ClickHouse/issues/47091): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#46885](https://github.com/ClickHouse/ClickHouse/issues/46885): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#47067](https://github.com/ClickHouse/ClickHouse/issues/47067): Fix typo in systemd service, which causes the systemd service start to fail. [#47051](https://github.com/ClickHouse/ClickHouse/pull/47051) ([Palash Goel](https://github.com/palash-goel)).
+* Backported in [#47259](https://github.com/ClickHouse/ClickHouse/issues/47259): Fix concrete columns PREWHERE support. [#47154](https://github.com/ClickHouse/ClickHouse/pull/47154) ([Azat Khuzhin](https://github.com/azat)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)).
+* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 3814e94bf24..34bc3f646fc 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,11 @@
+v23.2.3.17-stable	2023-03-06
+v23.2.2.20-stable	2023-03-01
 v23.2.1.2537-stable	2023-02-23
+v23.1.4.58-stable	2023-03-01
 v23.1.3.5-stable	2023-02-03
 v23.1.2.9-stable	2023-01-29
 v23.1.1.3077-stable	2023-01-25
+v22.12.4.76-stable	2023-03-01
 v22.12.3.5-stable	2023-01-10
 v22.12.2.25-stable	2023-01-06
 v22.12.1.1752-stable	2022-12-15
@@ -25,6 +29,7 @@ v22.9.4.32-stable	2022-10-26
 v22.9.3.18-stable	2022-09-30
 v22.9.2.7-stable	2022-09-23
 v22.9.1.2603-stable	2022-09-22
+v22.8.14.53-lts	2023-02-27
 v22.8.13.20-lts	2023-01-29
 v22.8.12.45-lts	2023-01-10
 v22.8.11.15-lts	2022-12-08

From df34ab240d19d68226b230413d0ae023d98686f7 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 6 Mar 2023 18:03:17 +0100
Subject: [PATCH 246/333] Don't use switch

---
 .../Formats/Impl/ORCBlockOutputFormat.cpp     | 30 ++++++++++---------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 4264e9da4d6..7fae2c8f480 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -36,22 +36,24 @@ namespace
 
 orc::CompressionKind getORCCompression(FormatSettings::ORCCompression method)
 {
-    switch (method)
-    {
-        case FormatSettings::ORCCompression::NONE:
-            return orc::CompressionKind::CompressionKind_NONE;
-        case FormatSettings::ORCCompression::SNAPPY:
+    if (method == FormatSettings::ORCCompression::NONE)
+        return orc::CompressionKind::CompressionKind_NONE;
+
 #if USE_SNAPPY
-            return orc::CompressionKind::CompressionKind_SNAPPY;
+    if (method == FormatSettings::ORCCompression::SNAPPY)
+        return orc::CompressionKind::CompressionKind_SNAPPY;
 #endif
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Snappy compression method is not supported");
-        case FormatSettings::ORCCompression::ZSTD:
-            return orc::CompressionKind::CompressionKind_ZSTD;
-        case FormatSettings::ORCCompression::LZ4:
-            return orc::CompressionKind::CompressionKind_LZ4;
-        case FormatSettings::ORCCompression::ZLIB:
-            return orc::CompressionKind::CompressionKind_ZLIB;
-    }
+
+    if (method == FormatSettings::ORCCompression::ZSTD)
+        return orc::CompressionKind::CompressionKind_ZSTD;
+
+    if (method == FormatSettings::ORCCompression::LZ4)
+        return orc::CompressionKind::CompressionKind_LZ4;
+
+    if (method == FormatSettings::ORCCompression::ZLIB)
+        return orc::CompressionKind::CompressionKind_ZLIB;
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
 }
 
 }

From fed645a4945efab6aa9a2b98ac2861de483be158 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 6 Mar 2023 18:03:54 +0100
Subject: [PATCH 247/333] Don't use switch

---
 .../Formats/Impl/ParquetBlockOutputFormat.cpp | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index 3695bb9d110..cedd8a9c54c 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -37,27 +37,29 @@ static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & se
 
 parquet::Compression::type getParquetCompression(FormatSettings::ParquetCompression method)
 {
-    switch (method)
-    {
-        case FormatSettings::ParquetCompression::NONE:
-            return parquet::Compression::type::UNCOMPRESSED;
-        case FormatSettings::ParquetCompression::SNAPPY:
+    if (method == FormatSettings::ParquetCompression::NONE)
+        return parquet::Compression::type::UNCOMPRESSED;
+
 #if USE_SNAPPY
-            return parquet::Compression::type::SNAPPY;
+    if (method == FormatSettings::ParquetCompression::SNAPPY)
+        return parquet::Compression::type::SNAPPY;
 #endif
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Snappy compression method is not supported");
-        case FormatSettings::ParquetCompression::BROTLI:
+
 #if USE_BROTLI
-            return parquet::Compression::type::BROTLI;
+    if (method == FormatSettings::ParquetCompression::BROTLI)
+        return parquet::Compression::type::BROTLI;
 #endif
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Brotli compression method is not supported");
-        case FormatSettings::ParquetCompression::ZSTD:
-            return parquet::Compression::type::ZSTD;
-        case FormatSettings::ParquetCompression::LZ4:
-            return parquet::Compression::type::LZ4;
-        case FormatSettings::ParquetCompression::GZIP:
-            return parquet::Compression::type::GZIP;
-    }
+
+    if (method == FormatSettings::ParquetCompression::ZSTD)
+        return parquet::Compression::type::ZSTD;
+
+    if (method == FormatSettings::ParquetCompression::LZ4)
+        return parquet::Compression::type::LZ4;
+
+    if (method == FormatSettings::ParquetCompression::GZIP)
+        return parquet::Compression::type::GZIP;
+
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
 }
 
 }

From b5dffe7417527bb11ef5979133e092ec1ca222db Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 6 Mar 2023 19:57:09 +0100
Subject: [PATCH 248/333] Fix bug in zero copy replica which can lead to
 dataloss

---
 src/Storages/StorageReplicatedMergeTree.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 54ae8aa5a7b..9e79a715610 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -8434,7 +8434,11 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
             }
             else if (error_code == Coordination::Error::ZNONODE)
             {
-                LOG_TRACE(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before)", zookeeper_part_uniq_node, part_name);
+                /// We don't know what to do, because this part can be mutation part
+                /// with hardlinked columns. Since we don't have this information (about blobs not to remove)
+                /// we refuce to remove blobs.
+                LOG_WARNING(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before), refuse to remove blobs", zookeeper_part_uniq_node, part_name);
+                return std::make_pair{false, {}};
             }
             else
             {

From b298af2640f28a0ae440d36a59e72d2dedaa77b0 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 6 Mar 2023 15:56:21 -0400
Subject: [PATCH 249/333] Update exponentialmovingaverage.md

---
 .../reference/exponentialmovingaverage.md     | 36 +++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
index 2587bc5533f..e99addf187c 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@@ -11,15 +11,15 @@ sidebar_title: exponentialMovingAverage
 **Syntax**
 
 ```sql
-exponentialMovingAverage(x)(value, timestamp)
+exponentialMovingAverage(x)(value, timeunit)
 ```
 
-Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
+Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
 
 **Arguments**
 
 -   `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
--   `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../unctions/arithmetic-functions/#intdiva-b).
 
 **Parameters**
 
@@ -148,3 +148,33 @@ Result:
 │     1 │   49 │                0.825 │ █████████████████████████████████████████▎│
 └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
 ```
+
+```sql
+CREATE TABLE data
+ENGINE = Memory AS
+SELECT
+    10 AS value,
+    toDateTime('2020-01-01') + (3600 * number) AS time
+FROM numbers_mt(10);
+
+
+SELECT
+    value,
+    time,
+    exponentialMovingAverage(1)(value, intDiv(toUInt32(time),3600)) OVER (ORDER BY time ASC) res
+FROM data
+ORDER BY time;
+
+┌─value─┬────────────────time─┬─────────res─┐
+│    10 │ 2020-01-01 00:00:00 │           5 │
+│    10 │ 2020-01-01 01:00:00 │         7.5 │
+│    10 │ 2020-01-01 02:00:00 │        8.75 │
+│    10 │ 2020-01-01 03:00:00 │       9.375 │
+│    10 │ 2020-01-01 04:00:00 │      9.6875 │
+│    10 │ 2020-01-01 05:00:00 │     9.84375 │
+│    10 │ 2020-01-01 06:00:00 │    9.921875 │
+│    10 │ 2020-01-01 07:00:00 │   9.9609375 │
+│    10 │ 2020-01-01 08:00:00 │  9.98046875 │
+│    10 │ 2020-01-01 09:00:00 │ 9.990234375 │
+└───────┴─────────────────────┴─────────────┘
+```

From 19dfd2eb17f9aa162801835304a77983e42f8120 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 6 Mar 2023 15:59:24 -0400
Subject: [PATCH 250/333] Update exponentialmovingaverage.md

---
 .../reference/exponentialmovingaverage.md     | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
index e99addf187c..a789a149d1e 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@@ -177,4 +177,25 @@ ORDER BY time;
 │    10 │ 2020-01-01 08:00:00 │  9.98046875 │
 │    10 │ 2020-01-01 09:00:00 │ 9.990234375 │
 └───────┴─────────────────────┴─────────────┘
+
+
+SELECT
+    value,
+    time,
+    round(exponentialMovingAverage(2)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC), 2) AS res
+FROM data
+ORDER BY time ASC
+
+┌─value─┬────────────────time─┬──res─┐
+│    10 │ 2020-01-01 00:00:00 │ 2.93 │
+│    10 │ 2020-01-01 01:00:00 │    5 │
+│    10 │ 2020-01-01 02:00:00 │ 6.46 │
+│    10 │ 2020-01-01 03:00:00 │  7.5 │
+│    10 │ 2020-01-01 04:00:00 │ 8.23 │
+│    10 │ 2020-01-01 05:00:00 │ 8.75 │
+│    10 │ 2020-01-01 06:00:00 │ 9.12 │
+│    10 │ 2020-01-01 07:00:00 │ 9.37 │
+│    10 │ 2020-01-01 08:00:00 │ 9.56 │
+│    10 │ 2020-01-01 09:00:00 │ 9.69 │
+└───────┴─────────────────────┴──────┘
 ```

From 683da4897ca5bd094089588ac7d8e61319d4e9aa Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 6 Mar 2023 16:04:36 -0400
Subject: [PATCH 251/333] Update exponentialmovingaverage.md

---
 .../reference/exponentialmovingaverage.md     | 60 ++++++++++---------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
index a789a149d1e..26507bedb31 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@@ -158,44 +158,48 @@ SELECT
 FROM numbers_mt(10);
 
 
+-- Calculate timeunit using intDiv
 SELECT
     value,
     time,
-    exponentialMovingAverage(1)(value, intDiv(toUInt32(time),3600)) OVER (ORDER BY time ASC) res
+    exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res,
+    intDiv(toUInt32(time), 3600) AS timeunit
 FROM data
-ORDER BY time;
+ORDER BY time ASC;
 
-┌─value─┬────────────────time─┬─────────res─┐
-│    10 │ 2020-01-01 00:00:00 │           5 │
-│    10 │ 2020-01-01 01:00:00 │         7.5 │
-│    10 │ 2020-01-01 02:00:00 │        8.75 │
-│    10 │ 2020-01-01 03:00:00 │       9.375 │
-│    10 │ 2020-01-01 04:00:00 │      9.6875 │
-│    10 │ 2020-01-01 05:00:00 │     9.84375 │
-│    10 │ 2020-01-01 06:00:00 │    9.921875 │
-│    10 │ 2020-01-01 07:00:00 │   9.9609375 │
-│    10 │ 2020-01-01 08:00:00 │  9.98046875 │
-│    10 │ 2020-01-01 09:00:00 │ 9.990234375 │
-└───────┴─────────────────────┴─────────────┘
+┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
+│    10 │ 2020-01-01 00:00:00 │           5 │   438288 │
+│    10 │ 2020-01-01 01:00:00 │         7.5 │   438289 │
+│    10 │ 2020-01-01 02:00:00 │        8.75 │   438290 │
+│    10 │ 2020-01-01 03:00:00 │       9.375 │   438291 │
+│    10 │ 2020-01-01 04:00:00 │      9.6875 │   438292 │
+│    10 │ 2020-01-01 05:00:00 │     9.84375 │   438293 │
+│    10 │ 2020-01-01 06:00:00 │    9.921875 │   438294 │
+│    10 │ 2020-01-01 07:00:00 │   9.9609375 │   438295 │
+│    10 │ 2020-01-01 08:00:00 │  9.98046875 │   438296 │
+│    10 │ 2020-01-01 09:00:00 │ 9.990234375 │   438297 │
+└───────┴─────────────────────┴─────────────┴──────────┘
 
 
+-- Calculate timeunit timeunit using toRelativeHourNum
 SELECT
     value,
     time,
-    round(exponentialMovingAverage(2)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC), 2) AS res
+    exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res,
+    toRelativeHourNum(time) AS timeunit
 FROM data
-ORDER BY time ASC
+ORDER BY time ASC;
 
-┌─value─┬────────────────time─┬──res─┐
-│    10 │ 2020-01-01 00:00:00 │ 2.93 │
-│    10 │ 2020-01-01 01:00:00 │    5 │
-│    10 │ 2020-01-01 02:00:00 │ 6.46 │
-│    10 │ 2020-01-01 03:00:00 │  7.5 │
-│    10 │ 2020-01-01 04:00:00 │ 8.23 │
-│    10 │ 2020-01-01 05:00:00 │ 8.75 │
-│    10 │ 2020-01-01 06:00:00 │ 9.12 │
-│    10 │ 2020-01-01 07:00:00 │ 9.37 │
-│    10 │ 2020-01-01 08:00:00 │ 9.56 │
-│    10 │ 2020-01-01 09:00:00 │ 9.69 │
-└───────┴─────────────────────┴──────┘
+┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
+│    10 │ 2020-01-01 00:00:00 │           5 │   438288 │
+│    10 │ 2020-01-01 01:00:00 │         7.5 │   438289 │
+│    10 │ 2020-01-01 02:00:00 │        8.75 │   438290 │
+│    10 │ 2020-01-01 03:00:00 │       9.375 │   438291 │
+│    10 │ 2020-01-01 04:00:00 │      9.6875 │   438292 │
+│    10 │ 2020-01-01 05:00:00 │     9.84375 │   438293 │
+│    10 │ 2020-01-01 06:00:00 │    9.921875 │   438294 │
+│    10 │ 2020-01-01 07:00:00 │   9.9609375 │   438295 │
+│    10 │ 2020-01-01 08:00:00 │  9.98046875 │   438296 │
+│    10 │ 2020-01-01 09:00:00 │ 9.990234375 │   438297 │
+└───────┴─────────────────────┴─────────────┴──────────┘
 ```

From 0b46de317b8b6217b9e920f494509b656f102c21 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 6 Mar 2023 16:33:28 -0400
Subject: [PATCH 252/333] Update ip-address-functions.md

---
 docs/en/sql-reference/functions/ip-address-functions.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index 3843ca0fc36..cbb93ee15d7 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -286,6 +286,14 @@ Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
 
 Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.
 
+## toIPv6OrDefault(string)
+
+Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns 0.
+
+## toIPv6OrNull(string)
+
+Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
+
 ## toIPv6
 
 Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.

From d8029b7b2d092c218668826abcfdfc1f3c2f6061 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 6 Mar 2023 16:36:04 -0400
Subject: [PATCH 253/333] Update ip-address-functions.md

---
 docs/en/sql-reference/functions/ip-address-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index cbb93ee15d7..47058a28d12 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -280,7 +280,7 @@ SELECT
 
 ## toIPv4OrDefault(string)
 
-Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0.
+Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).
 
 ## toIPv4OrNull(string)
 
@@ -288,7 +288,7 @@ Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null
 
 ## toIPv6OrDefault(string)
 
-Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns 0.
+Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
 
 ## toIPv6OrNull(string)
 

From 93ba0f4bdf45384bcbcf1296226275fe4a5ea7e6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Mar 2023 01:30:44 +0300
Subject: [PATCH 254/333] Update StackTrace.cpp

---
 src/Common/StackTrace.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp
index 0aebaf9801c..3c0bcd2f808 100644
--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@@ -291,7 +291,7 @@ void StackTrace::tryCapture()
 #endif
 }
 
-// Clickhouse uses bundled libc++ so type names will be same on every system thus it's save to hardcode them
+/// ClickHouse uses bundled libc++ so type names will be the same on every system thus it's safe to hardcode them
 constexpr std::pair<std::string_view, std::string_view> replacements[]
     = {{"::__1", ""}, {"std::basic_string<char, std::char_traits<char>, std::allocator<char>>", "String"}};
 

From 990af0fb7977ab41b291df5c563c062ab26557e6 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 6 Mar 2023 23:44:41 +0100
Subject: [PATCH 255/333] Fix typo

---
 src/Databases/DatabaseAtomic.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 34c4fd3d5d8..eaeeb793b23 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -273,7 +273,7 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
     else
         renameNoReplace(old_metadata_path, new_metadata_path);
 
-    /// After metadata was successfully moved, the following methods should not throw (if them do, it's a logical error)
+    /// After metadata was successfully moved, the following methods should not throw (if they do, it's a logical error)
     table_data_path = detach(*this, table_name, table->storesDataOnDisk());
     if (exchange)
         other_table_data_path = detach(other_db, to_table_name, other_table->storesDataOnDisk());

From 1e11348a03113a2096b1103dc0dcd651a960f5bd Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Mon, 6 Mar 2023 20:42:17 -0400
Subject: [PATCH 256/333] Update exponentialmovingaverage.md

---
 .../aggregate-functions/reference/exponentialmovingaverage.md   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
index 26507bedb31..296aae41daa 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@@ -19,7 +19,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
 **Arguments**
 
 -   `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
--   `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../unctions/arithmetic-functions/#intdiva-b).
+-   `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).
 
 **Parameters**
 

From aacd05e34bcc6abd44eb67613b68551b18fd4d99 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Mon, 6 Mar 2023 21:24:04 -0500
Subject: [PATCH 257/333] bugfix + review suggestion

---
 .../self-extracting-executable/decompressor.cpp  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 8db0d95e70d..1374c269dc1 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -170,7 +170,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
 
 bool isSudo()
 {
-    return getuid() == 0 && geteuid() == 0 && getenv("SUDO_USER") && getenv("SUDO_UID") && getenv("SUDO_GID"); // NOLINT(concurrency-mt-unsafe)
+    return geteuid() == 0 && getenv("SUDO_USER") && getenv("SUDO_UID") && getenv("SUDO_GID"); // NOLINT(concurrency-mt-unsafe)
 }
 
 /// Read data about files and decomrpess them.
@@ -423,6 +423,13 @@ int main(int/* argc*/, char* argv[])
     else
         name = file_path;
 
+    struct stat input_info;
+    if (0 != stat(self, &input_info))
+    {
+        perror("stat");
+        return 1;
+    }
+
 #if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
     /// get inode of this executable
     uint64_t inode = getInode(self);
@@ -450,13 +457,6 @@ int main(int/* argc*/, char* argv[])
         return 1;
     }
 
-    struct stat input_info;
-    if (0 != stat(self, &input_info))
-    {
-        perror("stat");
-        return 1;
-    }
-
     /// inconsistency in WSL1 Ubuntu - inode reported in /proc/self/maps is a 64bit to
     /// 32bit conversion of input_info.st_ino
     if (input_info.st_ino & 0xFFFFFFFF00000000 && !(inode & 0xFFFFFFFF00000000))

From 951d65f2fdea0ff7027f71eecd4b5bb821c76403 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 7 Mar 2023 08:36:11 +0100
Subject: [PATCH 258/333] Fix debug build

---
 src/Client/TestHint.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp
index c9e845f2039..1500a8baf26 100644
--- a/src/Client/TestHint.cpp
+++ b/src/Client/TestHint.cpp
@@ -95,7 +95,7 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
                 }
                 else if (token.type == TokenType::BareWord)
                 {
-                    int code = code = DB::ErrorCodes::getErrorCodeByName(std::string_view(token.begin, token.end));
+                    int code = DB::ErrorCodes::getErrorCodeByName(std::string_view(token.begin, token.end));
                     error_codes.push_back(code);
                 }
                 else

From 946dff2d04e7f7af8d3541ed7de11f195b521b6c Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 7 Mar 2023 10:51:06 +0100
Subject: [PATCH 259/333] Fix test 02566_ipv4_ipv6_binary_formats

---
 .../0_stateless/02566_ipv4_ipv6_binary_formats.reference        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference
index e228d911715..a3d8a33f757 100644
--- a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference
+++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference
@@ -6,7 +6,7 @@ Arrow
 2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
 Parquet
 ipv6	Nullable(FixedString(16))					
-ipv4	Nullable(Int64)					
+ipv4	Nullable(UInt32)					
 2001:db8:11a3:9d7:1f34:8a2e:7a0:765d	127.0.0.1
 ORC
 ipv6	Nullable(String)					

From 0d778e693bbeb90d7a8d15c9724684e1b3766913 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 7 Mar 2023 11:31:43 +0100
Subject: [PATCH 260/333] Update
 docs/en/sql-reference/statements/create/view.md

---
 docs/en/sql-reference/statements/create/view.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 6e412c13291..a9e7e34dcbb 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -67,7 +67,7 @@ Materialized views in ClickHouse use **column names** instead of column order du
 
 Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
 
-Materialized views in ClickHouse does not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
+Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
 
 By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
 

From 785128cb55712b342f5ff2d5c9324f71870cad10 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Tue, 7 Mar 2023 11:44:51 +0100
Subject: [PATCH 261/333] Fix index_granularity for test

---
 tests/queries/0_stateless/00636_partition_key_parts_pruning.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh b/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh
index fdaecd87f53..7ec4d99f028 100755
--- a/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh
+++ b/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ${CLICKHOUSE_CLIENT} --query="SELECT '*** Single column partition key ***'"
 
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS single_col_partition_key"
-${CLICKHOUSE_CLIENT} --query="CREATE TABLE single_col_partition_key(x UInt32) ENGINE MergeTree ORDER BY x PARTITION BY intDiv(x, 10)"
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE single_col_partition_key(x UInt32) ENGINE MergeTree ORDER BY x PARTITION BY intDiv(x, 10) SETTINGS index_granularity=4"
 
 ${CLICKHOUSE_CLIENT} --query="INSERT INTO single_col_partition_key VALUES (1), (2), (3), (4), (11), (12), (20)"
 

From 20925406ba0b1d51b338dd386e28a2fef5633573 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Tue, 7 Mar 2023 11:48:28 +0100
Subject: [PATCH 262/333] Docs for `BackupsIOThreadPool` 1/2

---
 docs/en/development/architecture.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md
index 50b338844df..5c867c2c6b9 100644
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro
 
 Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
 
-IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
+IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
 
 For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
 

From ace08ad6b26d8f1d5a48667bca270056549e95c4 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Tue, 7 Mar 2023 11:49:38 +0100
Subject: [PATCH 263/333] Update docs/en/development/architecture.md

---
 docs/en/development/architecture.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md
index 5c867c2c6b9..dd46b294ddd 100644
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro
 
 Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
 
-IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
+IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings.
 
 For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
 

From 2ae0bceb2b427e0e1a1d8ae3e6d30107448fad3e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 7 Mar 2023 10:56:11 +0000
Subject: [PATCH 264/333] Address PR comments

---
 docs/en/operations/settings/settings.md     | 53 +++++++--------------
 docs/en/sql-reference/operators/in.md       |  6 +--
 src/Interpreters/Cluster.cpp                | 26 ++++++----
 src/Interpreters/InterpreterSelectQuery.cpp | 12 ++---
 4 files changed, 43 insertions(+), 54 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 809fdb17d99..94dcf159ca9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1248,8 +1248,8 @@ Possible values:
 Default value: 1.
 
 :::warning
-Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) with [parallel_replicas_mode](#settings-parallel_replicas_mode) set to `sample_key` or `read_tasks`.
-If [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `custom_key`, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
+Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
+If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
 If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
 :::
 
@@ -1275,11 +1275,14 @@ Default value: `1`.
 
 **Additional Info**
 
-This options will produce different results depending on the value of [parallel_replicas_mode](#settings-parallel_replicas_mode).
+This options will produce different results depending on the settings used.
 
-### `sample_key`
+:::warning
+This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
+:::
+
+### Parallel processing using `SAMPLE` key
 
-If [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `sample_key`, this setting is useful for replicated tables with a sampling key. 
 A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
 
 - The position of the sampling key in the partitioning key does not allow efficient range scans.
@@ -1287,45 +1290,21 @@ A query may be processed faster if it is executed on several servers in parallel
 - The sampling key is an expression that is expensive to calculate.
 - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
 
-### `custom_key`
+### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
 
-If [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `custom_key`, this setting is useful for any replicated table.
-A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
-and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
-
-Use `default` for [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type) unless the data is split across the entire integer space (e.g. column contains hash values),
-then `range` should be used.
-Simple expressions using primary keys are preferred.
-
-If the `custom_key` mode is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
-Otherwise, it will behave same as `sample_key` mode, it will use multiple replicas of each shard.
-
-:::warning
-This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
-:::
-
-## parallel_replicas_mode {#settings-parallel_replicas_mode}
-
-Mode of splitting work between replicas.
-
-Possible values:
-
--   `sample_key` — Use `SAMPLE` key defined in the `SAMPLE BY` clause to split the work between replicas.
--   `custom_key` — Define an arbitrary integer expression to use for splitting work between replicas.
--   `read_tasks` — Split tasks for reading physical parts between replicas.
-
-Default value: `sample_key`.
+This setting is useful for any replicated table.
 
 ## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
 
-Map of arbitrary integer expression that can be used to split work between replicas for a specific table.
-If it's used with `cluster` function, the key can be name of the local table defined inside the `cluster` function.
-If it's used with `Distributed` engine, the key can be name of the distributed table, alias or the local table for which the `Distributed` engine is created.
+An arbitrary integer expression that can be used to split work between replicas for a specific table.
 The value can be any integer expression.
+A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
+and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
 
-Used only if `parallel_replicas_mode` is set to `custom_key`.
+Simple expressions using primary keys are preferred.
 
-Default value: `{}`.
+If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
+Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
 
 ## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
 
diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md
index e1e4118524a..0599a50c0a4 100644
--- a/docs/en/sql-reference/operators/in.md
+++ b/docs/en/sql-reference/operators/in.md
@@ -235,10 +235,10 @@ If `some_predicate` is not selective enough, it will return large amount of data
 
 When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
 
-For example, if [parallel_replicas_mode](#settings-parallel_replicas_mode) is set to `sample_key`, the following:
+For example, the following:
 ```sql
 SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
-SETTINGS max_parallel_replicas=3, parallel_replicas_mode='sample_key'
+SETTINGS max_parallel_replicas=3
 ```
 
 is transformed on each server into
@@ -256,4 +256,4 @@ Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) se
 
 One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
 
-If a table doesn't have a sampling key, more flexible options for [parallel_replicas_mode](#settings-parallel_replicas_mode) can be used that can produce different and more optimal behaviour.
+If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour.
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index b419dacd523..0add0e427f9 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -671,7 +671,7 @@ std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector
 namespace
 {
 
-void shuffleReplicas(auto & replicas, const Settings & settings)
+void shuffleReplicas(std::vector<Cluster::Address> & replicas, const Settings & settings, size_t replicas_needed)
 {
     std::random_device rd;
     std::mt19937 gen{rd()};
@@ -679,15 +679,25 @@ void shuffleReplicas(auto & replicas, const Settings & settings)
     if (settings.prefer_localhost_replica)
     {
         // force for local replica to always be included
-        auto local_replica = std::find_if(replicas.begin(), replicas.end(), [](const auto & replica) { return replica.is_local; });
-        if (local_replica != replicas.end())
-        {
-            if (local_replica != replicas.begin())
-                std::swap(*replicas.begin(), *local_replica);
+        auto first_non_local_replica = std::partition(replicas.begin(), replicas.end(), [](const auto & replica) { return replica.is_local; });
+        size_t local_replicas_count = first_non_local_replica - replicas.begin();
 
-            std::shuffle(replicas.begin() + 1, replicas.end(), gen);
+        if (local_replicas_count == replicas_needed)
+        {
+            /// we have exact amount of local replicas as needed, no need to do anything
             return;
         }
+
+        if (local_replicas_count > replicas_needed)
+        {
+            /// we can use only local replicas, shuffle them
+            std::shuffle(replicas.begin(), first_non_local_replica, gen);
+            return;
+        }
+
+        /// shuffle just non local replicas
+        std::shuffle(first_non_local_replica, replicas.end(), gen);
+        return;
     }
 
     std::shuffle(replicas.begin(), replicas.end(), gen);
@@ -751,7 +761,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
         {
             auto shuffled_replicas = replicas;
             // shuffle replicas so we don't always pick the same subset
-            shuffleReplicas(shuffled_replicas, settings);
+            shuffleReplicas(shuffled_replicas, settings, max_replicas_from_shard);
             create_shards_from_replicas(std::span{shuffled_replicas.begin(), max_replicas_from_shard});
         }
     }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d016922e89e..33d362556aa 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -523,7 +523,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         {
             if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context))
             {
-                LOG_INFO(log, "Processing query on a replica using custom_key");
+                LOG_TRACE(log, "Processing query on a replica using custom_key '{}'", settings.parallel_replicas_custom_key.value);
                 if (!storage)
                     throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
 
@@ -537,11 +537,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             }
             else if (settings.parallel_replica_offset > 0)
             {
-                LOG_DEBUG(
-                    log,
-                    "Will use no data on this replica because parallel replicas processing with custom_key has been requested"
-                    " (setting 'max_parallel_replicas') but the table does not have custom_key defined for it or it's invalid (settings `parallel_replicas_custom_key`)");
-                parallel_replicas_custom_filter_ast = std::make_shared<ASTLiteral>(false);
+                throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Parallel replicas processing with custom_key has been requested "
+                        "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it "
+                        "or it's invalid (settings `parallel_replicas_custom_key`)");
             }
         }
         else if (auto * distributed = dynamic_cast<StorageDistributed *>(storage.get());

From 3dcdf4cd4ef72b3a8043e7533c1119efbbbfa090 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Tue, 7 Mar 2023 12:12:29 +0100
Subject: [PATCH 265/333] Docs for `BackupsIOThreadPool` 2/2

---
 .../settings.md                               | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 17d03dfa4ec..92a8270ac0f 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -967,6 +967,7 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr
 Possible values:
 
 -   Positive integer.
+-   0 — No limit.
 
 Default value: `10000`.
 
@@ -976,6 +977,69 @@ Default value: `10000`.
 <thread_pool_queue_size>12000</thread_pool_queue_size>
 ```
 
+## max_io_thread_pool_size {#max-io-thread-pool-size}
+
+ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. to interact with S3). `max_io_thread_pool_size` limits the maximum number of threads in the pool.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `100`.
+
+## max_io_thread_pool_free_size {#max-io-thread-pool-free-size}
+
+If the number of **idle** threads in the IO Thread pool is greater than `max_io_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `0`.
+
+## io_thread_pool_queue_size {#io-thread-pool-queue-size}
+
+The maximum number of jobs that can be scheduled on the IO Thread pool.
+
+Possible values:
+
+-   Positive integer.
+-   0 — No limit.
+
+Default value: `10000`.
+
+## max_backups_io_thread_pool_size {#max-backups-io-thread-pool-size}
+
+ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO operations. `max_backups_io_thread_pool_size` limits the maximum number of threads in the pool.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `1000`.
+
+## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size}
+
+If the number of **idle** threads in the Backups IO Thread pool is greater than `max_backup_io_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary.
+
+Possible values:
+
+-   Positive integer.
+-   Zero. 
+
+Default value: `0`.
+
+## backups_io_thread_pool_queue_size {#backups-io-thread-pool-queue-size}
+
+The maximum number of jobs that can be scheduled on the Backups IO Thread pool. It is recommended to keep this queue unlimited due to the current S3 backup logic.
+
+Possible values:
+
+-   Positive integer.
+-   0 — No limit.
+
+Default value: `0`.
+
 ## background_pool_size {#background_pool_size}
 
 Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied  at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.

From f408f750ebc9a4f09224de2e027be2b0f84e2e0a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 7 Mar 2023 11:24:46 +0000
Subject: [PATCH 266/333] Disable parallel replicas for joins

---
 src/Interpreters/InterpreterSelectQuery.cpp                    | 3 ++-
 .../02535_max_parallel_replicas_custom_key.reference           | 1 +
 .../0_stateless/02535_max_parallel_replicas_custom_key.sh      | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 33d362556aa..2f579244b9a 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -455,10 +455,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         }
     }
 
-    if (joined_tables.tablesCount() > 1 && settings.allow_experimental_parallel_reading_from_replicas)
+    if (joined_tables.tablesCount() > 1 && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas))
     {
         LOG_WARNING(log, "Joins are not supported with parallel replicas. Query will be executed without using them.");
         context->setSetting("allow_experimental_parallel_reading_from_replicas", false);
+        context->setSetting("parallel_replicas_custom_key", String{""});
     }
 
     /// Rewrite JOINs
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
index 9e4e24235aa..8d0f56ba185 100644
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
@@ -170,3 +170,4 @@ filter_type='range' max_replicas=3 prefer_localhost_replica=1
 0	334
 1	333
 2	333
+1
diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
index 314251be874..3035a191c8f 100755
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@@ -41,4 +41,6 @@ run_count_with_custom_key "y"
 run_count_with_custom_key "cityHash64(y)"
 run_count_with_custom_key "cityHash64(y) + 1"
 
+$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "Joins are not supported with parallel replicas"
+
 $CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"

From 1db6b9414e8b71f27bd1040f696d780f020ca1e0 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@clickhouse.com>
Date: Tue, 7 Mar 2023 12:39:17 +0100
Subject: [PATCH 267/333] Update src/Storages/StorageReplicatedMergeTree.cpp

Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 9e79a715610..61eedc18736 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -8436,7 +8436,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
             {
                 /// We don't know what to do, because this part can be mutation part
                 /// with hardlinked columns. Since we don't have this information (about blobs not to remove)
-                /// we refuce to remove blobs.
+                /// we refuse to remove blobs.
                 LOG_WARNING(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before), refuse to remove blobs", zookeeper_part_uniq_node, part_name);
                 return std::make_pair{false, {}};
             }

From c10cb436f41979e92679752d74dc5129637f932c Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 7 Mar 2023 12:42:42 +0100
Subject: [PATCH 268/333] Fix build

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 61eedc18736..6c6ff30fd04 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -8438,7 +8438,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
                 /// with hardlinked columns. Since we don't have this information (about blobs not to remove)
                 /// we refuse to remove blobs.
                 LOG_WARNING(logger, "Node with parent zookeeper lock {} for part {} doesn't exist (part was unlocked before), refuse to remove blobs", zookeeper_part_uniq_node, part_name);
-                return std::make_pair{false, {}};
+                return {false, {}};
             }
             else
             {

From ced97cefbb5dcbdb74782543281c71ccba387efc Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 7 Mar 2023 09:36:41 +0000
Subject: [PATCH 269/333] Address review comments

---
 .../Passes/LogicalExpressionOptimizer.cpp     | 21 +++++++++++++++----
 .../Passes/LogicalExpressionOptimizerPass.h   | 12 +++++------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
index 85b44de74da..73585a4cd23 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp
@@ -102,7 +102,12 @@ private:
 
         if (and_operands.size() == 1)
         {
-            assert(!function_node.getResultType()->isNullable());
+            /// AND operator can have UInt8 or bool as its type.
+            /// bool is used if a bool constant is at least one operand.
+            /// Because we reduce the number of operands here by eliminating the same equality checks,
+            /// the only situation we can end up here is we had AND check where all the equality checks are the same so we know the type is UInt8.
+            /// Otherwise, we will have > 1 operands and we don't have to do anything.
+            assert(!function_node.getResultType()->isNullable() && and_operands[0]->getResultType()->equals(*function_node.getResultType()));
             node = std::move(and_operands[0]);
             return;
         }
@@ -204,9 +209,17 @@ private:
 
         if (or_operands.size() == 1)
         {
-            assert(!function_node.getResultType()->isNullable());
-            node = std::move(or_operands[0]);
-            return;
+            /// if the result type of operand is the same as the result type of OR
+            /// we can replace OR with the operand
+            if (or_operands[0]->getResultType()->equals(*function_node.getResultType()))
+            {
+                assert(!function_node.getResultType()->isNullable());
+                node = std::move(or_operands[0]);
+                return;
+            }
+
+            /// otherwise add a stub 0 to make OR correct
+            or_operands.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));
         }
 
         auto or_function_resolver = FunctionFactory::instance().get("or", getContext());
diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index b436c94fe4c..05c10ddc685 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -6,9 +6,9 @@ namespace DB
 {
 
 /**
- * This pass tries to do optimizations on logical expression
+ * This pass tries to do optimizations on logical expression:
  *
- * Replaces chains of equality functions inside an OR with a single IN operator.
+ * 1. Replaces chains of equality functions inside an OR with a single IN operator.
  * The replacement is done if:
  *  - one of the operands  of the equality function is a constant
  *  - length of chain is at least 'optimize_min_equality_disjunction_chain_length' long OR the expression has type of LowCardinality
@@ -26,7 +26,7 @@ namespace DB
  * WHERE b = 'test' OR a IN (1, 2);
  * -------------------------------
  *
- * Removes duplicate OR checks
+ * 2. Removes duplicate OR checks
  * -------------------------------
  * SELECT *
  * FROM table
@@ -39,7 +39,7 @@ namespace DB
  * WHERE a = 1 OR b = 'test';
  * -------------------------------
  *
- * Replaces AND chains with a single constant.
+ * 3. Replaces AND chains with a single constant.
  * The replacement is done if:
  *  - one of the operands  of the equality function is a constant
  *  - constants are different for same expression
@@ -55,7 +55,7 @@ namespace DB
  * WHERE 0;
  * -------------------------------
  *
- * Removes duplicate AND checks
+ * 4. Removes duplicate AND checks
  * -------------------------------
  * SELECT *
  * FROM table
@@ -74,7 +74,7 @@ class LogicalExpressionOptimizerPass final : public IQueryTreePass
 public:
     String getName() override { return "LogicalExpressionOptimizer"; }
 
-    String getDescription() override { return "Transform all the 'or's with equality check to a single IN function"; }
+    String getDescription() override { return "Transform equality chain to a single IN function or a constant if possible"; }
 
     void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
 };

From 37b09ce46addcfabdfae531c93e1108a74ed0746 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 7 Mar 2023 12:16:01 +0000
Subject: [PATCH 270/333] add back trace log

---
 tests/config/config.d/keeper_port.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index cffd325e968..6c60c8936a9 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -14,6 +14,7 @@
             <!-- we want all logs for complex problems investigation -->
             <reserved_log_items>1000000000000000</reserved_log_items>
             <snapshot_distance>100000</snapshot_distance>
+            <raft_logs_level>trace</raft_logs_level>
 
             <!-- For instant start in single node configuration -->
             <heart_beat_interval_ms>0</heart_beat_interval_ms>

From 7ce20f5cd2f702df04b2ccbc58117cee9cd974fb Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 7 Mar 2023 00:53:37 +0100
Subject: [PATCH 271/333] fix tests

---
 src/Interpreters/InterpreterKillQueryQuery.cpp        |  4 ++++
 .../Executors/PushingAsyncPipelineExecutor.cpp        |  2 +-
 tests/integration/test_grpc_protocol/test.py          |  2 --
 .../02232_dist_insert_send_logs_level_hung.sh         | 11 ++++++++++-
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp
index 40698386ccb..3330159aff5 100644
--- a/src/Interpreters/InterpreterKillQueryQuery.cpp
+++ b/src/Interpreters/InterpreterKillQueryQuery.cpp
@@ -161,6 +161,8 @@ public:
                 if (curr_process.processed)
                     continue;
 
+                LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (synchronously)", curr_process.query_id);
+
                 auto code = process_list.sendCancelToQuery(curr_process.query_id, curr_process.user, true);
 
                 if (code != CancellationCode::QueryIsNotInitializedYet && code != CancellationCode::CancelSent)
@@ -226,6 +228,8 @@ BlockIO InterpreterKillQueryQuery::execute()
             MutableColumns res_columns = header.cloneEmptyColumns();
             for (const auto & query_desc : queries_to_stop)
             {
+                if (!query.test)
+                    LOG_DEBUG(&Poco::Logger::get("KillQuery"), "Will kill query {} (asynchronously)", query_desc.query_id);
                 auto code = (query.test) ? CancellationCode::Unknown : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true);
                 insertResultRow(query_desc.source_num, code, processes_block, header, res_columns);
             }
diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
index 70815bb8b3b..4478f1548a4 100644
--- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
@@ -187,7 +187,7 @@ void PushingAsyncPipelineExecutor::push(Chunk chunk)
 
     if (!is_pushed)
         throw Exception(ErrorCodes::LOGICAL_ERROR,
-                        "Pipeline for PushingPipelineExecutor was finished before all data was inserted");
+                        "Pipeline for PushingAsyncPipelineExecutor was finished before all data was inserted");
 }
 
 void PushingAsyncPipelineExecutor::push(Block block)
diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py
index a1bc0d42a46..137d585f7d1 100644
--- a/tests/integration/test_grpc_protocol/test.py
+++ b/tests/integration/test_grpc_protocol/test.py
@@ -594,8 +594,6 @@ def test_cancel_while_processing_input():
     stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel)
     result = stub.ExecuteQueryWithStreamInput(send_query_info())
     assert result.cancelled == True
-    assert result.progress.written_rows == 6
-    assert query("SELECT a FROM t ORDER BY a") == "1\n2\n3\n4\n5\n6\n"
 
 
 def test_cancel_while_generating_output():
diff --git a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh
index 5ed94148bc1..734cef06214 100755
--- a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh
+++ b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh
@@ -49,7 +49,16 @@ insert_client_opts=(
 timeout 250s $CLICKHOUSE_CLIENT "${client_opts[@]}" "${insert_client_opts[@]}" -q "insert into function remote('127.2', currentDatabase(), in_02232) select * from numbers(1e6)"
 
 # Kill underlying query of remote() to make KILL faster
-timeout 60s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE Settings['log_comment'] = '$CLICKHOUSE_LOG_COMMENT' SYNC" --format Null
+# This test is reproducing very interesting bahaviour.
+# The block size is 1, so the secondary query creates InterpreterSelectQuery for each row due to pushing to the MV.
+# It works extremely slow, and the initial query produces new blocks and writes them to the socket much faster
+# then the secondary query can read and process them. Therefore, it fills network buffers in the kernel.
+# Once a buffer in the kernel is full, send(...) blocks until the secondary query will finish processing data
+# that it already has in ReadBufferFromPocoSocket and call recv.
+# Or until the kernel will decide to resize the buffer (seems like it has non-trivial rules for that).
+# Anyway, it may look like the initial query got stuck, but actually it did not.
+# Moreover, the initial query cannot be killed at that point, so KILL QUERY ... SYNC will get "stuck" as well.
+timeout 30s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE query like '%INSERT INTO $CLICKHOUSE_DATABASE.in_02232%' SYNC" --format Null
 echo $?
 
 $CLICKHOUSE_CLIENT "${client_opts[@]}" -nm -q "

From 0da310c8dcf51331bc435f7cf49c69e84145286e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Tue, 7 Mar 2023 15:30:40 +0100
Subject: [PATCH 272/333] Update ProcessList.cpp

---
 src/Interpreters/ProcessList.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index a26844ae73c..b792ea538ae 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -404,7 +404,7 @@ CancellationCode QueryStatus::cancelQuery(bool)
         std::lock_guard lock(executors_mutex);
         executors_snapshot = executors;
     }
-    
+
     /// We should call cancel() for each executor with unlocked executors_mutex, because
     /// cancel() can try to lock some internal mutex that is already locked by query executing
     /// thread, and query executing thread can call removePipelineExecutor and lock executors_mutex,

From 27e4b09f944ae450e394b2886dd557ac64ae141b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 7 Mar 2023 15:13:13 +0000
Subject: [PATCH 273/333] Fix 02570_fallback_from_async_insert

---
 tests/queries/0_stateless/02570_fallback_from_async_insert.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh
index 9c158d6241b..d7c8944b89d 100755
--- a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh
+++ b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh
@@ -47,6 +47,7 @@ $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT --query "
     SELECT 'id_' || splitByChar('_', query_id)[1] AS id FROM system.text_log
     WHERE query_id LIKE '%$query_id_suffix' AND message LIKE '%$message%'
+    ORDER BY id
 "
 
 $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t_async_insert_fallback"

From d9611dde0b12d2c7c5b5c633f028d7c625ae4d78 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 7 Mar 2023 15:23:27 +0000
Subject: [PATCH 274/333] fix test

---
 .../01705_normalize_create_alter_function_names.reference     | 4 ++--
 .../01705_normalize_create_alter_function_names.sql           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference
index b6f5fe99ca1..b5b93c34c00 100644
--- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference
+++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference
@@ -1,2 +1,2 @@
-CREATE TABLE default.x\n(\n    `i` Int32,\n    INDEX mm rand() TYPE minmax GRANULARITY 1,\n    INDEX nn rand() TYPE minmax GRANULARITY 1,\n    PROJECTION p\n    (\n        SELECT max(i)\n    ),\n    PROJECTION p2\n    (\n        SELECT min(i)\n    )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192
-metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n
+CREATE TABLE default.x\n(\n    `i` Int32,\n    INDEX mm log2(i) TYPE minmax GRANULARITY 1,\n    INDEX nn log2(i) TYPE minmax GRANULARITY 1,\n    PROJECTION p\n    (\n        SELECT max(i)\n    ),\n    PROJECTION p2\n    (\n        SELECT min(i)\n    )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192
+metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm log2(i) TYPE minmax GRANULARITY 1, nn log2(i) TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n
diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql
index 683bd271405..be0f7e8b710 100644
--- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql
+++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql
@@ -2,9 +2,9 @@
 
 drop table if exists x;
 
-create table x(i int, index mm RAND() type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i;
+create table x(i int, index mm LOG2(i) type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i;
 
-alter table x add index nn RAND() type minmax granularity 1, add projection p2 (select MIN(i));
+alter table x add index nn LOG2(i) type minmax granularity 1, add projection p2 (select MIN(i));
 
 show create x;
 

From af23c8bed07f665c0cf074b64bb10d7ad49cbb66 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 6 Mar 2023 17:46:46 +0100
Subject: [PATCH 275/333] Get the release logs w/o delay from curl

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2ef05fe989b..4b4c7e6c70e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -15,7 +15,7 @@ jobs:
     - name: Deploy packages and assets
       run: |
         GITHUB_TAG="${GITHUB_REF#refs/tags/}"
-        curl --silent --data '' \
+        curl --silent --data '' --no-buffer \
           '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
   ############################################################################################
   ##################################### Docker images  #######################################

From c63be68a8fd9a18c881df656d56f17ea899280fc Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 7 Mar 2023 13:35:00 +0100
Subject: [PATCH 276/333] Use explicit docker tags and paths

---
 .github/workflows/backport_branches.yml | 3 ++-
 .github/workflows/master.yml            | 3 ++-
 .github/workflows/pull_request.yml      | 3 ++-
 .github/workflows/release.yml           | 3 ++-
 .github/workflows/release_branches.yml  | 3 ++-
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index a324d20abc9..7cdf11fec0f 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -452,7 +452,8 @@ jobs:
       - name: Check docker clickhouse/clickhouse-server building
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head --no-push
+          python3 docker_server.py --release-type head --no-push \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
           python3 docker_server.py --release-type head --no-push --no-ubuntu \
             --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
       - name: Cleanup
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index b70fe256833..7c5e477ab60 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -860,7 +860,8 @@ jobs:
       - name: Check docker clickhouse/clickhouse-server building
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head
+          python3 docker_server.py --release-type head \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
           python3 docker_server.py --release-type head --no-ubuntu \
             --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
       - name: Cleanup
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ff98739db00..e7303c7b69f 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -917,7 +917,8 @@ jobs:
       - name: Check docker clickhouse/clickhouse-server building
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head --no-push
+          python3 docker_server.py --release-type head --no-push \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
           python3 docker_server.py --release-type head --no-push --no-ubuntu \
             --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
       - name: Cleanup
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4b4c7e6c70e..e69d189c80e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -31,7 +31,8 @@ jobs:
     - name: Check docker clickhouse/clickhouse-server building
       run: |
         cd "$GITHUB_WORKSPACE/tests/ci"
-        python3 docker_server.py --release-type auto --version "${{ github.ref }}"
+        python3 docker_server.py --release-type auto --version "${{ github.ref }}" \
+          --image-repo clickhouse/clickhouse-server --image-path docker/server
         python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
           --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
     - name: Cleanup
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 74ec1163cc9..e56a1fb58fc 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -525,7 +525,8 @@ jobs:
       - name: Check docker clickhouse/clickhouse-server building
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 docker_server.py --release-type head --no-push
+          python3 docker_server.py --release-type head --no-push \
+            --image-repo clickhouse/clickhouse-server --image-path docker/server
           python3 docker_server.py --release-type head --no-push --no-ubuntu \
             --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
       - name: Cleanup

From dedd242f350d9ddfb8a43eac36103f4841b57941 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 7 Mar 2023 13:42:18 +0100
Subject: [PATCH 277/333] Add a manual trigger for release workflow

---
 .github/workflows/release.yml | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index e69d189c80e..73246af6dfc 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -7,14 +7,27 @@ on: # yamllint disable-line rule:truthy
   release:
     types:
     - published
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Release tag'
+        required: true
+        type: string
 
 jobs:
   ReleasePublish:
     runs-on: [self-hosted, style-checker]
     steps:
+    - name: Set tag from input
+      if: github.event_name == 'workflow_dispatch'
+      run: |
+        echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
+    - name: Set tag from REF
+      if: github.event_name == 'release'
+      run: |
+        echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
     - name: Deploy packages and assets
       run: |
-        GITHUB_TAG="${GITHUB_REF#refs/tags/}"
         curl --silent --data '' --no-buffer \
           '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
   ############################################################################################
@@ -23,17 +36,26 @@ jobs:
   DockerServerImages:
     runs-on: [self-hosted, style-checker]
     steps:
+    - name: Set tag from input
+      if: github.event_name == 'workflow_dispatch'
+      run: |
+        echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
+    - name: Set tag from REF
+      if: github.event_name == 'release'
+      run: |
+        echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
     - name: Check out repository code
       uses: ClickHouse/checkout@v1
       with:
         clear-repository: true
         fetch-depth: 0  # otherwise we will have no version info
+        ref: ${{ env.GITHUB_TAG }}
     - name: Check docker clickhouse/clickhouse-server building
       run: |
         cd "$GITHUB_WORKSPACE/tests/ci"
-        python3 docker_server.py --release-type auto --version "${{ github.ref }}" \
+        python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
           --image-repo clickhouse/clickhouse-server --image-path docker/server
-        python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
+        python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --no-ubuntu \
           --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
     - name: Cleanup
       if: always()

From 86410a7a3f8a9241413f8a36d3bda04e003e17da Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 7 Mar 2023 16:16:51 +0000
Subject: [PATCH 278/333] Fix MergeTreeTransaction::isReadOnly

---
 src/Interpreters/MergeTreeTransaction.cpp | 7 +++++++
 src/Interpreters/MergeTreeTransaction.h   | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp
index 50ecb061752..bfdda354c9b 100644
--- a/src/Interpreters/MergeTreeTransaction.cpp
+++ b/src/Interpreters/MergeTreeTransaction.cpp
@@ -168,6 +168,8 @@ void MergeTreeTransaction::addMutation(const StoragePtr & table, const String &
 bool MergeTreeTransaction::isReadOnly() const
 {
     std::lock_guard lock{mutex};
+    if (finalized)
+        return is_read_only;
     chassert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty());
     return storages.empty();
 }
@@ -318,6 +320,11 @@ bool MergeTreeTransaction::rollback() noexcept
 void MergeTreeTransaction::afterFinalize()
 {
     std::lock_guard lock{mutex};
+    chassert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty());
+
+    /// Remember if it was read-only transaction before we clear storages
+    is_read_only = storages.empty();
+
     /// Release shared pointers just in case
     storages.clear();
     mutations.clear();
diff --git a/src/Interpreters/MergeTreeTransaction.h b/src/Interpreters/MergeTreeTransaction.h
index e5a80e03e18..4ca36cf64ad 100644
--- a/src/Interpreters/MergeTreeTransaction.h
+++ b/src/Interpreters/MergeTreeTransaction.h
@@ -78,6 +78,9 @@ private:
 
     bool finalized TSA_GUARDED_BY(mutex) = false;
 
+    /// Indicates if transaction was read-only before `afterFinalize`
+    bool is_read_only TSA_GUARDED_BY(mutex) = false;
+
     /// Lists of changes made by transaction
     std::unordered_set<StoragePtr> storages TSA_GUARDED_BY(mutex);
     DataPartsVector creating_parts TSA_GUARDED_BY(mutex);

From 4e79df448f151a471d049c6938c06e2e690e9904 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 7 Mar 2023 16:17:35 +0000
Subject: [PATCH 279/333] Catch exceptions in LiveViewPeriodicRefreshTask

---
 src/Storages/LiveView/StorageLiveView.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index 547becf3837..3467628d081 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -241,7 +241,18 @@ StorageLiveView::StorageLiveView(
     blocks_metadata_ptr = std::make_shared<BlocksMetadataPtr>();
     active_ptr = std::make_shared<bool>(true);
 
-    periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
+    periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask",
+        [this]
+        {
+            try
+            {
+                periodicRefreshTaskFunc();
+            }
+            catch (...)
+            {
+                tryLogCurrentException(log, "Exception in LiveView periodic refresh task in BackgroundSchedulePool");
+            }
+        });
     periodic_refresh_task->deactivate();
 }
 

From ba0cc954415275380c93f51c145b6ed479388aa3 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 7 Mar 2023 20:04:52 +0100
Subject: [PATCH 280/333] Fix typo

---
 src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 7fae2c8f480..39cacde94ed 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -23,7 +23,7 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 
 namespace DB
-{7
+{
 
 namespace ErrorCodes
 {

From ff8fc0634ac6c27b74b7182e3307212b8b25f5bd Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 7 Mar 2023 19:50:42 +0000
Subject: [PATCH 281/333] Docs: Small cleanups after Kafka fix #47138

---
 docs/en/engines/table-engines/integrations/kafka.md | 12 ++++++------
 docs/en/sql-reference/statements/create/table.md    |  8 ++------
 src/Storages/ColumnsDescription.h                   |  4 +---
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index e6134043b8e..255ba06f056 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -19,8 +19,8 @@ Kafka lets you:
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
-    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
-    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    name1 [type1],
+    name2 [type2],
     ...
 ) ENGINE = Kafka()
 SETTINGS
@@ -113,6 +113,10 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
 
 </details>
 
+:::info
+The Kafka table engine doesn't support columns with [default value](../../../sql-reference/statements/create/table.md#default_value). If you need columns with default value, you can add them at materialized view level (see below).
+:::
+
 ## Description {#description}
 
 The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
@@ -125,10 +129,6 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi
 2.  Create a table with the desired structure.
 3.  Create a materialized view that converts data from the engine and puts it into a previously created table.
 
-:::info
-Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level.
-:::
-
 When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`.
 One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without).
 
diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index b3d5060e13c..50e74920e4b 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -110,7 +110,7 @@ If the type is not `Nullable` and if `NULL` is specified, it will be treated as
 
 See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting.
 
-## Default Values
+## Default Values {#default_values}
 
 The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
 
@@ -124,10 +124,6 @@ If the data type and default expression are defined explicitly, this expression
 
 Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed.
 
-:::info
-Kafka Engine doesn't support columns with default value of type `DEFAULT/MATERIALIZED/EPHEMERAL/ALIAS`. If you need columns with any default type, they can be added at `MATERIALIZED VIEW` level, see [Kafka Engine](../../../engines/table-engines/integrations/kafka.md#description).
-:::
-
 ### DEFAULT
 
 `DEFAULT expr`
@@ -580,7 +576,7 @@ SELECT * FROM base.t1;
 You can add a comment to the table when you creating it.
 
 :::note
-The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
+The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md).
 :::
 
 
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index 36109392ab6..5551fdea2e3 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -132,9 +132,7 @@ public:
     NamesAndTypesList getInsertable() const; /// ordinary + ephemeral
     NamesAndTypesList getAliases() const;
     NamesAndTypesList getEphemeral() const;
-    // Columns with preset default expression.
-    // For example from `CREATE TABLE` statement
-    NamesAndTypesList getWithDefaultExpression() const;
+    NamesAndTypesList getWithDefaultExpression() const; // columns with default expression, for example set by `CREATE TABLE` statement
     NamesAndTypesList getAllPhysical() const; /// ordinary + materialized.
     NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + ephemeral
     /// Returns .size0/.null/...

From 9792b93a1171f8fc95479c616b236141c70af667 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 23 Feb 2023 13:48:36 +0100
Subject: [PATCH 282/333] Fix changing a role which is already expired in
 RoleCache.

---
 src/Access/RoleCache.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp
index 308b771243e..7a8c13636d2 100644
--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@@ -163,11 +163,15 @@ void RoleCache::roleChanged(const UUID & role_id, const RolePtr & changed_role)
 
     std::lock_guard lock{mutex};
     auto role_from_cache = cache.get(role_id);
-    if (!role_from_cache)
-        return;
-    role_from_cache->first = changed_role;
-    cache.update(role_id, role_from_cache);
-    collectEnabledRoles(&notifications);
+    if (role_from_cache)
+    {
+        /// We update the role stored in a cache entry only if that entry has not expired yet.
+        role_from_cache->first = changed_role;
+        cache.update(role_id, role_from_cache);
+    }
+
+    /// An enabled role for some users has been changed, we need to recalculate the access rights.
+    collectEnabledRoles(&notifications); /// collectEnabledRoles() must be called with the `mutex` locked.
 }
 
 
@@ -177,8 +181,12 @@ void RoleCache::roleRemoved(const UUID & role_id)
     scope_guard notifications;
 
     std::lock_guard lock{mutex};
+
+    /// If a cache entry with the role has expired already, that remove() will do nothing.
     cache.remove(role_id);
-    collectEnabledRoles(&notifications);
+
+    /// An enabled role for some users has been removed, we need to recalculate the access rights.
+    collectEnabledRoles(&notifications); /// collectEnabledRoles() must be called with the `mutex` locked.
 }
 
 }

From f8eb1fad91f0ea389e1a6f7b61270b07aa606f76 Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Tue, 7 Mar 2023 23:06:14 +0100
Subject: [PATCH 283/333] Disable parallel format in health check

---
 tests/clickhouse-test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e30e709f363..c1103da1552 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -118,6 +118,7 @@ def clickhouse_execute_http(
         "http_connection_timeout": timeout,
         "http_receive_timeout": timeout,
         "http_send_timeout": timeout,
+        "output_format_parallel_formatting": 0,
     }
     if settings is not None:
         params.update(settings)

From 9920a52c51d177bd18ffb9a5788b29c8af9a5ac1 Mon Sep 17 00:00:00 2001
From: Mike Kot <myrrc@double.cloud>
Date: Mon, 6 Mar 2023 17:53:58 +0300
Subject: [PATCH 284/333] use std::lerp, constexpr hex.h

---
 base/base/hex.h                               | 214 ++++++++++++++++++
 base/base/interpolate.h                       |  13 ++
 programs/install/Install.cpp                  |   2 +-
 src/Backups/BackupCoordinationRemote.cpp      |   2 +-
 src/Backups/BackupImpl.cpp                    |   2 +-
 src/Backups/IBackupCoordination.h             |   2 +-
 src/Columns/tests/gtest_weak_hash_32.cpp      |   2 +-
 src/Common/BinStringDecodeHelper.h            |   2 +-
 src/Common/OpenTelemetryTraceContext.cpp      |   2 +-
 src/Common/SymbolIndex.cpp                    |   2 +-
 src/Common/escapeForFileName.cpp              |   2 +-
 src/Common/formatIPv6.cpp                     |   2 +-
 src/Common/formatIPv6.h                       |   2 +-
 src/Common/getHashOfLoadedBinary.cpp          |   2 +-
 src/Common/getMappedArea.cpp                  |   2 +-
 src/Common/hex.cpp                            |  92 --------
 src/Common/hex.h                              | 145 ------------
 src/Common/interpolate.h                      |  20 --
 src/Compression/CompressedReadBufferBase.cpp  |   2 +-
 src/Compression/CompressionCodecMultiple.cpp  |   2 +-
 src/Coordination/KeeperDispatcher.cpp         |   2 +-
 src/Coordination/KeeperStorage.cpp            |   2 +-
 src/Daemon/SentryWriter.cpp                   |   2 +-
 .../IO/CachedOnDiskReadBufferFromFile.cpp     |   2 +-
 src/Disks/IO/ReadBufferFromRemoteFSGather.cpp |   2 +-
 src/Formats/BSONTypes.cpp                     |   2 +-
 src/Formats/verbosePrintString.cpp            |   2 +-
 src/Functions/FunctionsCodingIP.cpp           |   2 +-
 src/Functions/FunctionsCodingUUID.cpp         |   2 +-
 src/Functions/URL/decodeURLComponent.cpp      |   2 +-
 src/Functions/bitShiftRight.cpp               |   2 +-
 src/Functions/decodeXMLComponent.cpp          |   2 +-
 src/IO/HTTPChunkedReadBuffer.cpp              |   2 +-
 src/IO/ReadHelpers.cpp                        |   2 +-
 src/IO/WriteHelpers.cpp                       |   2 +-
 src/IO/tests/gtest_hadoop_snappy_decoder.cpp  |   2 +-
 src/Interpreters/Cache/FileCacheKey.h         |   2 +-
 src/Interpreters/Cache/FileSegment.cpp        |   2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   2 +-
 src/Interpreters/OpenTelemetrySpanLog.cpp     |   2 +-
 .../tests/gtest_lru_file_cache.cpp            |   2 +-
 src/Parsers/Access/ParserCreateUserQuery.cpp  |   2 +-
 src/Server/KeeperTCPHandler.cpp               |   2 +-
 .../Cache/ExternalDataSourceCache.cpp         |   2 +-
 .../DistributedAsyncInsertDirectoryQueue.cpp  |   2 +-
 src/Storages/LiveView/LiveViewSink.h          |   2 +-
 src/Storages/LiveView/StorageLiveView.cpp     |   2 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  |   2 +-
 .../MergeTree/MergeTreeDataPartChecksum.cpp   |   2 +-
 src/Storages/MergeTree/MergeTreePartition.cpp |   2 +-
 .../PartMetadataManagerWithCache.cpp          |   2 +-
 .../MergeTree/SimpleMergeSelector.cpp         |  10 +-
 .../MaterializedPostgreSQLConsumer.cpp        |   2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |   2 +-
 src/Storages/System/StorageSystemParts.cpp    |   2 +-
 .../System/StorageSystemProjectionParts.cpp   |   2 +-
 utils/checksum-for-compressed-block/main.cpp  |   2 +-
 utils/wikistat-loader/main.cpp                |   2 +-
 58 files changed, 284 insertions(+), 314 deletions(-)
 create mode 100644 base/base/hex.h
 create mode 100644 base/base/interpolate.h
 delete mode 100644 src/Common/hex.cpp
 delete mode 100644 src/Common/hex.h
 delete mode 100644 src/Common/interpolate.h

diff --git a/base/base/hex.h b/base/base/hex.h
new file mode 100644
index 00000000000..e0c57f9dd42
--- /dev/null
+++ b/base/base/hex.h
@@ -0,0 +1,214 @@
+#pragma once
+
+#include <cstring>
+#include "types.h"
+
+/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
+
+constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
+constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
+
+constexpr char hexDigitUppercase(unsigned char c)
+{
+    return hex_digit_to_char_uppercase_table[c];
+}
+constexpr char hexDigitLowercase(unsigned char c)
+{
+    return hex_digit_to_char_lowercase_table[c];
+}
+
+/// Maps 0..255 to 00..FF or 00..ff correspondingly
+
+constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
+    "000102030405060708090A0B0C0D0E0F"
+    "101112131415161718191A1B1C1D1E1F"
+    "202122232425262728292A2B2C2D2E2F"
+    "303132333435363738393A3B3C3D3E3F"
+    "404142434445464748494A4B4C4D4E4F"
+    "505152535455565758595A5B5C5D5E5F"
+    "606162636465666768696A6B6C6D6E6F"
+    "707172737475767778797A7B7C7D7E7F"
+    "808182838485868788898A8B8C8D8E8F"
+    "909192939495969798999A9B9C9D9E9F"
+    "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
+    "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
+    "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
+    "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
+    "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
+    "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
+
+constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
+    "000102030405060708090a0b0c0d0e0f"
+    "101112131415161718191a1b1c1d1e1f"
+    "202122232425262728292a2b2c2d2e2f"
+    "303132333435363738393a3b3c3d3e3f"
+    "404142434445464748494a4b4c4d4e4f"
+    "505152535455565758595a5b5c5d5e5f"
+    "606162636465666768696a6b6c6d6e6f"
+    "707172737475767778797a7b7c7d7e7f"
+    "808182838485868788898a8b8c8d8e8f"
+    "909192939495969798999a9b9c9d9e9f"
+    "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
+    "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
+    "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
+    "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
+    "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
+    "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
+
+inline void writeHexByteUppercase(UInt8 byte, void * out)
+{
+    memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
+}
+
+inline void writeHexByteLowercase(UInt8 byte, void * out)
+{
+    memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
+}
+
+constexpr inline std::string_view bin_byte_to_char_table = //
+    "0000000000000001000000100000001100000100000001010000011000000111"
+    "0000100000001001000010100000101100001100000011010000111000001111"
+    "0001000000010001000100100001001100010100000101010001011000010111"
+    "0001100000011001000110100001101100011100000111010001111000011111"
+    "0010000000100001001000100010001100100100001001010010011000100111"
+    "0010100000101001001010100010101100101100001011010010111000101111"
+    "0011000000110001001100100011001100110100001101010011011000110111"
+    "0011100000111001001110100011101100111100001111010011111000111111"
+    "0100000001000001010000100100001101000100010001010100011001000111"
+    "0100100001001001010010100100101101001100010011010100111001001111"
+    "0101000001010001010100100101001101010100010101010101011001010111"
+    "0101100001011001010110100101101101011100010111010101111001011111"
+    "0110000001100001011000100110001101100100011001010110011001100111"
+    "0110100001101001011010100110101101101100011011010110111001101111"
+    "0111000001110001011100100111001101110100011101010111011001110111"
+    "0111100001111001011110100111101101111100011111010111111001111111"
+    "1000000010000001100000101000001110000100100001011000011010000111"
+    "1000100010001001100010101000101110001100100011011000111010001111"
+    "1001000010010001100100101001001110010100100101011001011010010111"
+    "1001100010011001100110101001101110011100100111011001111010011111"
+    "1010000010100001101000101010001110100100101001011010011010100111"
+    "1010100010101001101010101010101110101100101011011010111010101111"
+    "1011000010110001101100101011001110110100101101011011011010110111"
+    "1011100010111001101110101011101110111100101111011011111010111111"
+    "1100000011000001110000101100001111000100110001011100011011000111"
+    "1100100011001001110010101100101111001100110011011100111011001111"
+    "1101000011010001110100101101001111010100110101011101011011010111"
+    "1101100011011001110110101101101111011100110111011101111011011111"
+    "1110000011100001111000101110001111100100111001011110011011100111"
+    "1110100011101001111010101110101111101100111011011110111011101111"
+    "1111000011110001111100101111001111110100111101011111011011110111"
+    "1111100011111001111110101111101111111100111111011111111011111111";
+
+inline void writeBinByte(UInt8 byte, void * out)
+{
+    memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
+}
+
+/// Produces hex representation of an unsigned int with leading zeros (for checksums)
+template <typename TUInt>
+inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
+{
+    union
+    {
+        TUInt value;
+        UInt8 uint8[sizeof(TUInt)];
+    };
+
+    value = uint_;
+
+    for (size_t i = 0; i < sizeof(TUInt); ++i)
+    {
+        if constexpr (std::endian::native == std::endian::little)
+            memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
+        else
+            memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
+    }
+}
+
+template <typename TUInt>
+inline void writeHexUIntUppercase(TUInt uint_, char * out)
+{
+    writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
+}
+
+template <typename TUInt>
+inline void writeHexUIntLowercase(TUInt uint_, char * out)
+{
+    writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
+}
+
+template <typename TUInt>
+std::string getHexUIntUppercase(TUInt uint_)
+{
+    std::string res(sizeof(TUInt) * 2, '\0');
+    writeHexUIntUppercase(uint_, res.data());
+    return res;
+}
+
+template <typename TUInt>
+std::string getHexUIntLowercase(TUInt uint_)
+{
+    std::string res(sizeof(TUInt) * 2, '\0');
+    writeHexUIntLowercase(uint_, res.data());
+    return res;
+}
+
+/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
+
+constexpr inline std::string_view hex_char_to_digit_table
+    = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
+       "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+       "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+       256};
+
+constexpr UInt8 unhex(char c)
+{
+    return hex_char_to_digit_table[static_cast<UInt8>(c)];
+}
+
+constexpr UInt8 unhex2(const char * data)
+{
+    return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
+}
+
+constexpr UInt16 unhex4(const char * data)
+{
+    return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
+        + static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
+}
+
+template <typename TUInt>
+constexpr TUInt unhexUInt(const char * data)
+{
+    TUInt res = 0;
+    if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
+    {
+        for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
+        {
+            res <<= 4;
+            res += unhex(*data);
+        }
+    }
+    else
+    {
+        for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
+        {
+            res <<= 64;
+            res += unhexUInt<UInt64>(data);
+        }
+    }
+    return res;
+}
diff --git a/base/base/interpolate.h b/base/base/interpolate.h
new file mode 100644
index 00000000000..1d4fc0b6257
--- /dev/null
+++ b/base/base/interpolate.h
@@ -0,0 +1,13 @@
+#pragma once
+#include <cassert>
+#include <cmath>
+
+/** Linear interpolation in logarithmic coordinates.
+  * Exponential interpolation is related to linear interpolation
+  * exactly in same way as geometric mean is related to arithmetic mean.
+  */
+constexpr double interpolateExponential(double min, double max, double ratio)
+{
+    assert(min > 0 && ratio >= 0 && ratio <= 1);
+    return min * std::pow(max / min, ratio);
+}
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index d568012bb26..80f3b0bbc63 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -20,7 +20,7 @@
 #include <Common/formatReadable.h>
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/OpenSSLHelpers.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/getResource.h>
 #include <base/sleep.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index c0fb4d5e066..8e43676f59c 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -6,7 +6,7 @@
 #include <IO/WriteHelpers.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/escapeForFileName.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Backups/BackupCoordinationStage.h>
 
 
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index b5f48a1a277..fb8abee814a 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -6,7 +6,7 @@
 #include <Backups/BackupCoordinationLocal.h>
 #include <Backups/BackupCoordinationRemote.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/quoteString.h>
 #include <Common/XMLUtils.h>
 #include <Interpreters/Context.h>
diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h
index b75d856b50f..588a20d9eeb 100644
--- a/src/Backups/IBackupCoordination.h
+++ b/src/Backups/IBackupCoordination.h
@@ -2,7 +2,7 @@
 
 #include <optional>
 #include <fmt/format.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Core/Types.h>
 
 
diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp
index 5755cc3af72..cbf47790b9f 100644
--- a/src/Columns/tests/gtest_weak_hash_32.cpp
+++ b/src/Columns/tests/gtest_weak_hash_32.cpp
@@ -14,7 +14,7 @@
 #include <DataTypes/DataTypesNumber.h>
 
 #include <Common/WeakHash.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 #include <unordered_map>
 #include <iostream>
diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h
index 513a4196b6f..df3e014cfad 100644
--- a/src/Common/BinStringDecodeHelper.h
+++ b/src/Common/BinStringDecodeHelper.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Common/hex.h>
+#include <base/hex.h>
 
 namespace DB
 {
diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp
index b62822ceda2..df4ee6a34bf 100644
--- a/src/Common/OpenTelemetryTraceContext.cpp
+++ b/src/Common/OpenTelemetryTraceContext.cpp
@@ -3,7 +3,7 @@
 #include <random>
 #include <base/getThreadId.h>
 #include <Common/Exception.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Core/Settings.h>
 #include <IO/Operators.h>
 
diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp
index 6f31009b1d2..f1cace5017c 100644
--- a/src/Common/SymbolIndex.cpp
+++ b/src/Common/SymbolIndex.cpp
@@ -1,7 +1,7 @@
 #if defined(__ELF__) && !defined(OS_FREEBSD)
 
 #include <Common/SymbolIndex.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 #include <algorithm>
 #include <optional>
diff --git a/src/Common/escapeForFileName.cpp b/src/Common/escapeForFileName.cpp
index bcca04706dc..a1f9bff28d0 100644
--- a/src/Common/escapeForFileName.cpp
+++ b/src/Common/escapeForFileName.cpp
@@ -1,4 +1,4 @@
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/escapeForFileName.h>
 
diff --git a/src/Common/formatIPv6.cpp b/src/Common/formatIPv6.cpp
index 7c027a23b4d..86e33beb7c3 100644
--- a/src/Common/formatIPv6.cpp
+++ b/src/Common/formatIPv6.cpp
@@ -1,5 +1,5 @@
 #include <Common/formatIPv6.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/StringUtils/StringUtils.h>
 
 #include <base/range.h>
diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h
index 7b88f93750b..be4dfc7391e 100644
--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@@ -7,7 +7,7 @@
 #include <utility>
 #include <base/range.h>
 #include <base/unaligned.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/StringUtils/StringUtils.h>
 
 constexpr size_t IPV4_BINARY_LENGTH = 4;
diff --git a/src/Common/getHashOfLoadedBinary.cpp b/src/Common/getHashOfLoadedBinary.cpp
index da053750036..cc0ad0d2143 100644
--- a/src/Common/getHashOfLoadedBinary.cpp
+++ b/src/Common/getHashOfLoadedBinary.cpp
@@ -4,7 +4,7 @@
 
 #include <link.h>
 #include <array>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 
 static int callback(dl_phdr_info * info, size_t, void * data)
diff --git a/src/Common/getMappedArea.cpp b/src/Common/getMappedArea.cpp
index 573d3194f3d..4f40c604c6a 100644
--- a/src/Common/getMappedArea.cpp
+++ b/src/Common/getMappedArea.cpp
@@ -4,7 +4,7 @@
 #if defined(OS_LINUX)
 
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 
diff --git a/src/Common/hex.cpp b/src/Common/hex.cpp
deleted file mode 100644
index e8f9b981062..00000000000
--- a/src/Common/hex.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <Common/hex.h>
-
-const char * const hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
-const char * const hex_digit_to_char_lowercase_table = "0123456789abcdef";
-
-const char * const hex_byte_to_char_uppercase_table =
-    "000102030405060708090A0B0C0D0E0F"
-    "101112131415161718191A1B1C1D1E1F"
-    "202122232425262728292A2B2C2D2E2F"
-    "303132333435363738393A3B3C3D3E3F"
-    "404142434445464748494A4B4C4D4E4F"
-    "505152535455565758595A5B5C5D5E5F"
-    "606162636465666768696A6B6C6D6E6F"
-    "707172737475767778797A7B7C7D7E7F"
-    "808182838485868788898A8B8C8D8E8F"
-    "909192939495969798999A9B9C9D9E9F"
-    "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
-    "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
-    "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
-    "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
-    "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
-    "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
-
-const char * const hex_byte_to_char_lowercase_table =
-    "000102030405060708090a0b0c0d0e0f"
-    "101112131415161718191a1b1c1d1e1f"
-    "202122232425262728292a2b2c2d2e2f"
-    "303132333435363738393a3b3c3d3e3f"
-    "404142434445464748494a4b4c4d4e4f"
-    "505152535455565758595a5b5c5d5e5f"
-    "606162636465666768696a6b6c6d6e6f"
-    "707172737475767778797a7b7c7d7e7f"
-    "808182838485868788898a8b8c8d8e8f"
-    "909192939495969798999a9b9c9d9e9f"
-    "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
-    "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
-    "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
-    "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
-    "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
-    "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
-
-const char * const hex_char_to_digit_table =
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
-    "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
-
-const char * const bin_byte_to_char_table =
-    "0000000000000001000000100000001100000100000001010000011000000111"
-    "0000100000001001000010100000101100001100000011010000111000001111"
-    "0001000000010001000100100001001100010100000101010001011000010111"
-    "0001100000011001000110100001101100011100000111010001111000011111"
-    "0010000000100001001000100010001100100100001001010010011000100111"
-    "0010100000101001001010100010101100101100001011010010111000101111"
-    "0011000000110001001100100011001100110100001101010011011000110111"
-    "0011100000111001001110100011101100111100001111010011111000111111"
-    "0100000001000001010000100100001101000100010001010100011001000111"
-    "0100100001001001010010100100101101001100010011010100111001001111"
-    "0101000001010001010100100101001101010100010101010101011001010111"
-    "0101100001011001010110100101101101011100010111010101111001011111"
-    "0110000001100001011000100110001101100100011001010110011001100111"
-    "0110100001101001011010100110101101101100011011010110111001101111"
-    "0111000001110001011100100111001101110100011101010111011001110111"
-    "0111100001111001011110100111101101111100011111010111111001111111"
-    "1000000010000001100000101000001110000100100001011000011010000111"
-    "1000100010001001100010101000101110001100100011011000111010001111"
-    "1001000010010001100100101001001110010100100101011001011010010111"
-    "1001100010011001100110101001101110011100100111011001111010011111"
-    "1010000010100001101000101010001110100100101001011010011010100111"
-    "1010100010101001101010101010101110101100101011011010111010101111"
-    "1011000010110001101100101011001110110100101101011011011010110111"
-    "1011100010111001101110101011101110111100101111011011111010111111"
-    "1100000011000001110000101100001111000100110001011100011011000111"
-    "1100100011001001110010101100101111001100110011011100111011001111"
-    "1101000011010001110100101101001111010100110101011101011011010111"
-    "1101100011011001110110101101101111011100110111011101111011011111"
-    "1110000011100001111000101110001111100100111001011110011011100111"
-    "1110100011101001111010101110101111101100111011011110111011101111"
-    "1111000011110001111100101111001111110100111101011111011011110111"
-    "1111100011111001111110101111101111111100111111011111111011111111";
diff --git a/src/Common/hex.h b/src/Common/hex.h
deleted file mode 100644
index 062a6c27f76..00000000000
--- a/src/Common/hex.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#pragma once
-#include <string>
-
-
-/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
-
-extern const char * const hex_digit_to_char_uppercase_table;
-extern const char * const hex_digit_to_char_lowercase_table;
-
-inline char hexDigitUppercase(unsigned char c)
-{
-    return hex_digit_to_char_uppercase_table[c];
-}
-
-inline char hexDigitLowercase(unsigned char c)
-{
-    return hex_digit_to_char_lowercase_table[c];
-}
-
-
-#include <cstring>
-#include <cstddef>
-
-#include <base/types.h>
-
-
-/// Maps 0..255 to 00..FF or 00..ff correspondingly
-
-extern const char * const hex_byte_to_char_uppercase_table;
-extern const char * const hex_byte_to_char_lowercase_table;
-
-inline void writeHexByteUppercase(UInt8 byte, void * out)
-{
-    memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
-}
-
-inline void writeHexByteLowercase(UInt8 byte, void * out)
-{
-    memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
-}
-
-extern const char * const bin_byte_to_char_table;
-
-inline void writeBinByte(UInt8 byte, void * out)
-{
-    memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
-}
-
-/// Produces hex representation of an unsigned int with leading zeros (for checksums)
-template <typename TUInt>
-inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table)
-{
-    union
-    {
-        TUInt value;
-        UInt8 uint8[sizeof(TUInt)];
-    };
-
-    value = uint_;
-
-    for (size_t i = 0; i < sizeof(TUInt); ++i)
-    {
-        if constexpr (std::endian::native == std::endian::little)
-            memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
-        else
-            memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
-    }
-}
-
-template <typename TUInt>
-inline void writeHexUIntUppercase(TUInt uint_, char * out)
-{
-    writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
-}
-
-template <typename TUInt>
-inline void writeHexUIntLowercase(TUInt uint_, char * out)
-{
-    writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
-}
-
-template <typename TUInt>
-std::string getHexUIntUppercase(TUInt uint_)
-{
-    std::string res(sizeof(TUInt) * 2, '\0');
-    writeHexUIntUppercase(uint_, res.data());
-    return res;
-}
-
-template <typename TUInt>
-std::string getHexUIntLowercase(TUInt uint_)
-{
-    std::string res(sizeof(TUInt) * 2, '\0');
-    writeHexUIntLowercase(uint_, res.data());
-    return res;
-}
-
-
-/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
-
-extern const char * const hex_char_to_digit_table;
-
-inline UInt8 unhex(char c)
-{
-    return hex_char_to_digit_table[static_cast<UInt8>(c)];
-}
-
-inline UInt8 unhex2(const char * data)
-{
-    return
-          static_cast<UInt8>(unhex(data[0])) * 0x10
-        + static_cast<UInt8>(unhex(data[1]));
-}
-
-inline UInt16 unhex4(const char * data)
-{
-    return
-          static_cast<UInt16>(unhex(data[0])) * 0x1000
-        + static_cast<UInt16>(unhex(data[1])) * 0x100
-        + static_cast<UInt16>(unhex(data[2])) * 0x10
-        + static_cast<UInt16>(unhex(data[3]));
-}
-
-template <typename TUInt>
-TUInt unhexUInt(const char * data)
-{
-    TUInt res = 0;
-    if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
-    {
-        for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
-        {
-            res <<= 4;
-            res += unhex(*data);
-        }
-    }
-    else
-    {
-        for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
-        {
-            res <<= 64;
-            res += unhexUInt<UInt64>(data);
-        }
-    }
-    return res;
-}
diff --git a/src/Common/interpolate.h b/src/Common/interpolate.h
deleted file mode 100644
index 05900563b80..00000000000
--- a/src/Common/interpolate.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <cmath>
-
-
-inline double interpolateLinear(double min, double max, double ratio)
-{
-    return min + (max - min) * ratio;
-}
-
-
-/** It is linear interpolation in logarithmic coordinates.
-  * Exponential interpolation is related to linear interpolation
-  *  exactly in same way as geometric mean is related to arithmetic mean.
-  * 'min' must be greater than zero, 'ratio' must be from 0 to 1.
-  */
-inline double interpolateExponential(double min, double max, double ratio)
-{
-    return min * std::pow(max / min, ratio);
-}
diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp
index ab856cc9801..3111f649b26 100644
--- a/src/Compression/CompressedReadBufferBase.cpp
+++ b/src/Compression/CompressedReadBufferBase.cpp
@@ -6,7 +6,7 @@
 #include <city.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Exception.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionFactory.h>
 #include <IO/ReadBuffer.h>
diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp
index 5203e349317..dba67749e4d 100644
--- a/src/Compression/CompressionCodecMultiple.cpp
+++ b/src/Compression/CompressionCodecMultiple.cpp
@@ -8,7 +8,7 @@
 #include <IO/WriteHelpers.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 
 namespace DB
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index a6d16334924..2aa11dd9eed 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -4,7 +4,7 @@
 #include <Poco/Path.h>
 #include <Poco/Util/AbstractConfiguration.h>
 
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/setThreadName.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/checkStackSize.h>
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 33b2a91d8bf..41a6af54204 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -11,7 +11,7 @@
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ZooKeeper/IKeeper.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
 #include <Common/LockMemoryExceptionInThread.h>
diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp
index 9f4f18e64d1..3c62e54b117 100644
--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@@ -13,7 +13,7 @@
 #include <Common/StackTrace.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Core/ServerUUID.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 #include "config.h"
 #include "config_version.h"
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 44a719c82bb..72346787cfb 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -4,7 +4,7 @@
 #include <IO/ReadBufferFromFile.h>
 #include <base/scope_guard.h>
 #include <Common/assert_cast.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/getRandomASCIIString.h>
 #include <Interpreters/Context.h>
 
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 43b6544acb0..8450e740ab5 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -5,7 +5,7 @@
 #include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
 #include <Common/logger_useful.h>
 #include <iostream>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Interpreters/FilesystemCacheLog.h>
 #include <Interpreters/Context.h>
 
diff --git a/src/Formats/BSONTypes.cpp b/src/Formats/BSONTypes.cpp
index 813c155325a..88396fd2aba 100644
--- a/src/Formats/BSONTypes.cpp
+++ b/src/Formats/BSONTypes.cpp
@@ -1,6 +1,6 @@
 #include <Formats/BSONTypes.h>
 #include <Common/Exception.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 namespace DB
 {
diff --git a/src/Formats/verbosePrintString.cpp b/src/Formats/verbosePrintString.cpp
index 2f3e09ed75f..5c6111c2929 100644
--- a/src/Formats/verbosePrintString.cpp
+++ b/src/Formats/verbosePrintString.cpp
@@ -1,5 +1,5 @@
 #include <Formats/verbosePrintString.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <IO/Operators.h>
 
 
diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp
index a941092b7d6..4784368db9b 100644
--- a/src/Functions/FunctionsCodingIP.cpp
+++ b/src/Functions/FunctionsCodingIP.cpp
@@ -26,7 +26,7 @@
 #include <IO/WriteHelpers.h>
 #include <Common/IPv6ToBinary.h>
 #include <Common/formatIPv6.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/typeid_cast.h>
 
 #include <arpa/inet.h>
diff --git a/src/Functions/FunctionsCodingUUID.cpp b/src/Functions/FunctionsCodingUUID.cpp
index dade406c801..dd9170e44ad 100644
--- a/src/Functions/FunctionsCodingUUID.cpp
+++ b/src/Functions/FunctionsCodingUUID.cpp
@@ -3,7 +3,7 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
 #include <Common/BitHelpers.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <Functions/FunctionFactory.h>
diff --git a/src/Functions/URL/decodeURLComponent.cpp b/src/Functions/URL/decodeURLComponent.cpp
index 9e516e73e3c..7d98ccd63a0 100644
--- a/src/Functions/URL/decodeURLComponent.cpp
+++ b/src/Functions/URL/decodeURLComponent.cpp
@@ -1,4 +1,4 @@
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <base/find_symbols.h>
diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp
index 108847f13ed..21a0f7584aa 100644
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@@ -1,6 +1,6 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionBinaryArithmetic.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 namespace DB
 {
diff --git a/src/Functions/decodeXMLComponent.cpp b/src/Functions/decodeXMLComponent.cpp
index 8b84bb1194e..a25e67e0e37 100644
--- a/src/Functions/decodeXMLComponent.cpp
+++ b/src/Functions/decodeXMLComponent.cpp
@@ -2,7 +2,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <base/find_symbols.h>
 
 
diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp
index b9c42088c41..65ccad4aab7 100644
--- a/src/IO/HTTPChunkedReadBuffer.cpp
+++ b/src/IO/HTTPChunkedReadBuffer.cpp
@@ -2,7 +2,7 @@
 
 #include <IO/ReadHelpers.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <base/arithmeticOverflow.h>
 
 
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 86a2b9c650e..e14b3ae9129 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1,5 +1,5 @@
 #include <Core/Defines.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/PODArray.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/memcpySmall.h>
diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp
index caeea0a82a3..a0eceddc6f6 100644
--- a/src/IO/WriteHelpers.cpp
+++ b/src/IO/WriteHelpers.cpp
@@ -2,7 +2,7 @@
 #include <cinttypes>
 #include <utility>
 #include <Common/formatIPv6.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 
 namespace DB
diff --git a/src/IO/tests/gtest_hadoop_snappy_decoder.cpp b/src/IO/tests/gtest_hadoop_snappy_decoder.cpp
index 4db0deac08e..2847c730735 100644
--- a/src/IO/tests/gtest_hadoop_snappy_decoder.cpp
+++ b/src/IO/tests/gtest_hadoop_snappy_decoder.cpp
@@ -14,7 +14,7 @@
 #include <Poco/StreamCopier.h>
 #include <Poco/String.h>
 #include <Common/SipHash.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 using namespace DB;
 TEST(HadoopSnappyDecoder, repeatNeedMoreInput)
 {
diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h
index fed4c7f47e0..67e1466e2d4 100644
--- a/src/Interpreters/Cache/FileCacheKey.h
+++ b/src/Interpreters/Cache/FileCacheKey.h
@@ -1,6 +1,6 @@
 #pragma once
 #include <Core/Types.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Core/UUID.h>
 
 namespace DB
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index f4d7b2612a5..bd4554c6532 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -2,7 +2,7 @@
 
 #include <base/getThreadId.h>
 #include <Common/scope_guard_safe.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/logger_useful.h>
 #include <Interpreters/Cache/FileCache.h>
 #include <IO/WriteBufferFromString.h>
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 29b7a4db609..c352280b7ed 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -9,7 +9,7 @@
 #include <Common/Macros.h>
 #include <Common/randomSeed.h>
 #include <Common/atomicRename.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 #include <Core/Defines.h>
 #include <Core/SettingsEnums.h>
diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp
index 57d5c11ad97..63b8ae406a6 100644
--- a/src/Interpreters/OpenTelemetrySpanLog.cpp
+++ b/src/Interpreters/OpenTelemetrySpanLog.cpp
@@ -10,7 +10,7 @@
 #include <DataTypes/DataTypeUUID.h>
 #include <Interpreters/Context.h>
 
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/CurrentThread.h>
 #include <Core/Field.h>
 
diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp
index 93faafb5cea..62aef2441d6 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@@ -9,7 +9,7 @@
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <Common/tests/gtest_global_context.h>
 #include <Common/SipHash.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Interpreters/Context.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp
index de83c5760c1..c1d0691d305 100644
--- a/src/Parsers/Access/ParserCreateUserQuery.cpp
+++ b/src/Parsers/Access/ParserCreateUserQuery.cpp
@@ -17,7 +17,7 @@
 #include <boost/algorithm/string/predicate.hpp>
 #include <base/insertAtEnd.h>
 #include "config.h"
-#include <Common/hex.h>
+#include <base/hex.h>
 #if USE_SSL
 #     include <openssl/crypto.h>
 #     include <openssl/rand.h>
diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp
index 0853c6ee62b..f9e11062906 100644
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@@ -20,7 +20,7 @@
 #include <queue>
 #include <mutex>
 #include <Coordination/FourLetterCommand.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 
 #ifdef POCO_HAVE_FD_EPOLL
diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp
index 56b2e661836..1fc68a2d774 100644
--- a/src/Storages/Cache/ExternalDataSourceCache.cpp
+++ b/src/Storages/Cache/ExternalDataSourceCache.cpp
@@ -15,7 +15,7 @@
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/SipHash.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Core/Types.h>
 #include <base/types.h>
 #include <consistent_hashing.h>
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index c6f675533c6..b47028b883a 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -17,7 +17,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/SipHash.h>
 #include <Common/quoteString.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/ActionBlocker.h>
 #include <Common/formatReadable.h>
 #include <Common/Stopwatch.h>
diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h
index 1d90e35618f..e163400f2af 100644
--- a/src/Storages/LiveView/LiveViewSink.h
+++ b/src/Storages/LiveView/LiveViewSink.h
@@ -3,7 +3,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <Processors/Sinks/SinkToStorage.h>
 #include <Storages/LiveView/StorageLiveView.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 
 namespace DB
diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp
index 547becf3837..b24f5fb2c63 100644
--- a/src/Storages/LiveView/StorageLiveView.cpp
+++ b/src/Storages/LiveView/StorageLiveView.cpp
@@ -27,7 +27,7 @@ limitations under the License. */
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Common/SipHash.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 #include <Storages/LiveView/StorageLiveView.h>
 #include <Storages/LiveView/LiveViewSource.h>
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index 0d123623f05..cf009a10c27 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -31,7 +31,7 @@
 #include <Interpreters/MutationsInterpreter.h>
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Interpreters/Context.h>
-#include <Common/interpolate.h>
+#include <base/interpolate.h>
 #include <Common/typeid_cast.h>
 #include <Common/escapeForFileName.h>
 #include <Parsers/queryToString.h>
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
index 8f4d066baa3..7a0b1d03e79 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
@@ -1,6 +1,6 @@
 #include "MergeTreeDataPartChecksum.h"
 #include <Common/SipHash.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadBufferFromString.h>
diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 3a6908ef32d..3b28012e7d6 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -12,7 +12,7 @@
 #include <Common/FieldVisitorToString.h>
 #include <Common/FieldVisitorHash.h>
 #include <Common/typeid_cast.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Core/Block.h>
 
 
diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp
index b6260d5edb6..3a53cf25745 100644
--- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp
+++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp
@@ -1,7 +1,7 @@
 #include "PartMetadataManagerWithCache.h"
 
 #if USE_ROCKSDB
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/ErrorCodes.h>
 #include <IO/HashingReadBuffer.h>
 #include <IO/ReadBufferFromString.h>
diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp
index 15291622a2a..af3373fd175 100644
--- a/src/Storages/MergeTree/SimpleMergeSelector.cpp
+++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp
@@ -1,6 +1,6 @@
 #include <Storages/MergeTree/SimpleMergeSelector.h>
 
-#include <Common/interpolate.h>
+#include <base/interpolate.h>
 
 #include <cmath>
 #include <cassert>
@@ -28,7 +28,7 @@ struct Estimator
         {
             double difference = std::abs(log2(static_cast<double>(sum_size) / size_prev_at_left));
             if (difference < settings.heuristic_to_align_parts_max_absolute_difference_in_powers_of_two)
-                current_score *= interpolateLinear(settings.heuristic_to_align_parts_max_score_adjustment, 1,
+                current_score *= std::lerp(settings.heuristic_to_align_parts_max_score_adjustment, 1,
                     difference / settings.heuristic_to_align_parts_max_absolute_difference_in_powers_of_two);
         }
 
@@ -115,8 +115,8 @@ bool allow(
 //    std::cerr << "size_normalized: " << size_normalized << "\n";
 
     /// Calculate boundaries for age
-    double min_age_to_lower_base = interpolateLinear(settings.min_age_to_lower_base_at_min_size, settings.min_age_to_lower_base_at_max_size, size_normalized);
-    double max_age_to_lower_base = interpolateLinear(settings.max_age_to_lower_base_at_min_size, settings.max_age_to_lower_base_at_max_size, size_normalized);
+    double min_age_to_lower_base = std::lerp(settings.min_age_to_lower_base_at_min_size, settings.min_age_to_lower_base_at_max_size, size_normalized);
+    double max_age_to_lower_base = std::lerp(settings.max_age_to_lower_base_at_min_size, settings.max_age_to_lower_base_at_max_size, size_normalized);
 
 //    std::cerr << "min_age_to_lower_base: " << min_age_to_lower_base << "\n";
 //    std::cerr << "max_age_to_lower_base: " << max_age_to_lower_base << "\n";
@@ -137,7 +137,7 @@ bool allow(
 
 //    std::cerr << "combined_ratio: " << combined_ratio << "\n";
 
-    double lowered_base = interpolateLinear(settings.base, 2.0, combined_ratio);
+    double lowered_base = std::lerp(settings.base, 2.0, combined_ratio);
 
 //    std::cerr << "------- lowered_base: " << lowered_base << "\n";
 
diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
index 9c6eeceb605..d048c94ac75 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@@ -2,7 +2,7 @@
 
 #include "StorageMaterializedPostgreSQL.h"
 #include <Columns/ColumnNullable.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterInsertQuery.h>
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 54ae8aa5a7b..4c0aced0b48 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -3,7 +3,7 @@
 #include <cstddef>
 #include <ranges>
 
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Common/Macros.h>
 #include <Common/ProfileEventsScope.h>
 #include <Common/StringUtils/StringUtils.h>
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index f6854e7d5d0..86ecb336b51 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -14,7 +14,7 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Databases/IDatabase.h>
 #include <Parsers/queryToString.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <Interpreters/TransactionVersionMetadata.h>
 #include <Interpreters/Context.h>
 
diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp
index 37c62ba5eb0..d2c6c3ef287 100644
--- a/src/Storages/System/StorageSystemProjectionParts.cpp
+++ b/src/Storages/System/StorageSystemProjectionParts.cpp
@@ -11,7 +11,7 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Databases/IDatabase.h>
 #include <Parsers/queryToString.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 namespace DB
 {
diff --git a/utils/checksum-for-compressed-block/main.cpp b/utils/checksum-for-compressed-block/main.cpp
index 27a2154340e..4f9923e7638 100644
--- a/utils/checksum-for-compressed-block/main.cpp
+++ b/utils/checksum-for-compressed-block/main.cpp
@@ -2,7 +2,7 @@
 #include <iostream>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/ReadHelpers.h>
-#include <Common/hex.h>
+#include <base/hex.h>
 
 
 /** A tool to easily prove if "Checksum doesn't match: corrupted data"
diff --git a/utils/wikistat-loader/main.cpp b/utils/wikistat-loader/main.cpp
index 31ade014c74..493f1df05da 100644
--- a/utils/wikistat-loader/main.cpp
+++ b/utils/wikistat-loader/main.cpp
@@ -1,6 +1,6 @@
 #include <boost/program_options.hpp>
 
-#include <Common/hex.h>
+#include <base/hex.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 #include <IO/ReadHelpers.h>

From 429696b575a1eaa4d2206a7b0cd7d925060cc6c6 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 7 Mar 2023 22:57:47 +0000
Subject: [PATCH 285/333] Update reference file

---
 .../00173_group_by_use_nulls.reference        | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.reference b/tests/queries/1_stateful/00173_group_by_use_nulls.reference
index 02723bf14dd..e82b996ad3c 100644
--- a/tests/queries/1_stateful/00173_group_by_use_nulls.reference
+++ b/tests/queries/1_stateful/00173_group_by_use_nulls.reference
@@ -8,3 +8,25 @@
 59183	1336
 33010362	1336
 800784	1336
+-- { echoOn }
+set allow_experimental_analyzer = 1;
+SELECT
+    CounterID AS k,
+    quantileBFloat16(0.5)(ResolutionWidth)
+FROM remote('127.0.0.{1,2}', test, hits)
+GROUP BY k
+ORDER BY
+    count() DESC,
+    CounterID ASC
+LIMIT 10
+SETTINGS group_by_use_nulls = 1;
+1704509	1384
+732797	1336
+598875	1384
+792887	1336
+3807842	1336
+25703952	1336
+716829	1384
+59183	1336
+33010362	1336
+800784	1336

From f5cf039190d547f52d718b2a16403ce8eecad8ab Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 7 Mar 2023 21:27:27 +0100
Subject: [PATCH 286/333] check if a symlink exists more carefully

---
 src/Common/filesystemHelpers.cpp | 8 ++++++++
 src/Common/filesystemHelpers.h   | 1 +
 src/Databases/DatabaseAtomic.cpp | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp
index 6e1b5573bef..eabc7bdafbb 100644
--- a/src/Common/filesystemHelpers.cpp
+++ b/src/Common/filesystemHelpers.cpp
@@ -383,6 +383,14 @@ bool isSymlink(const fs::path & path)
     return fs::is_symlink(path);        /// STYLE_CHECK_ALLOW_STD_FS_SYMLINK
 }
 
+bool isSymlinkNoThrow(const fs::path & path)
+{
+    std::error_code dummy;
+    if (path.filename().empty())
+        return fs::is_symlink(path.parent_path(), dummy);      /// STYLE_CHECK_ALLOW_STD_FS_SYMLINK
+    return fs::is_symlink(path, dummy);        /// STYLE_CHECK_ALLOW_STD_FS_SYMLINK
+}
+
 fs::path readSymlink(const fs::path & path)
 {
     /// See the comment for isSymlink
diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h
index 14ee5f54322..8591cd6cf92 100644
--- a/src/Common/filesystemHelpers.h
+++ b/src/Common/filesystemHelpers.h
@@ -95,6 +95,7 @@ void setModificationTime(const std::string & path, time_t time);
 time_t getChangeTime(const std::string & path);
 
 bool isSymlink(const fs::path & path);
+bool isSymlinkNoThrow(const fs::path & path);
 fs::path readSymlink(const fs::path & path);
 
 }
diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp
index 34c4fd3d5d8..d8e746f01f9 100644
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@@ -509,6 +509,9 @@ void DatabaseAtomic::tryCreateMetadataSymlink()
     {
         try
         {
+            /// fs::exists could return false for broken symlink
+            if (FS::isSymlinkNoThrow(metadata_symlink))
+                fs::remove(metadata_symlink);
             fs::create_directory_symlink(metadata_path, path_to_metadata_symlink);
         }
         catch (...)

From 51edcf232cf2962d51d57f808ab6cea42e28327d Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 8 Mar 2023 00:52:58 +0000
Subject: [PATCH 287/333] Add support for big int types to runningDifference()
 function

---
 src/Functions/runningDifference.h             | 10 +++++++-
 .../queries/0_stateless/00534_filimonov.data  |  8 +++++++
 .../00653_running_difference.reference        | 24 +++++++++++++++++++
 .../0_stateless/00653_running_difference.sql  |  8 +++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h
index 154370d4cd9..f1ec4f9e523 100644
--- a/src/Functions/runningDifference.h
+++ b/src/Functions/runningDifference.h
@@ -70,7 +70,7 @@ private:
 
             if (!has_prev_value)
             {
-                dst[i] = is_first_line_zero ? 0 : src[i];
+                dst[i] = is_first_line_zero ? static_cast<Dst>(0) : static_cast<Dst>(src[i]);
                 prev = src[i];
                 has_prev_value = true;
             }
@@ -102,6 +102,10 @@ private:
             f(UInt32());
         else if (which.isUInt64())
             f(UInt64());
+        else if (which.isUInt128())
+            f(UInt128());
+        else if (which.isUInt256())
+            f(UInt256());
         else if (which.isInt8())
             f(Int8());
         else if (which.isInt16())
@@ -110,6 +114,10 @@ private:
             f(Int32());
         else if (which.isInt64())
             f(Int64());
+        else if (which.isInt128())
+            f(Int128());
+        else if (which.isInt256())
+            f(Int256());
         else if (which.isFloat32())
             f(Float32());
         else if (which.isFloat64())
diff --git a/tests/queries/0_stateless/00534_filimonov.data b/tests/queries/0_stateless/00534_filimonov.data
index 911a8e4d1f3..eb4500877e5 100644
--- a/tests/queries/0_stateless/00534_filimonov.data
+++ b/tests/queries/0_stateless/00534_filimonov.data
@@ -276,10 +276,14 @@ SELECT runningDifference(CAST( 0  AS Nullable(Int8)));
 SELECT runningDifference(CAST( 0  AS Nullable(Int16)));
 SELECT runningDifference(CAST( 0  AS Nullable(Int32)));
 SELECT runningDifference(CAST( 0  AS Nullable(Int64)));
+SELECT runningDifference(CAST( 0  AS Nullable(Int128)));
+SELECT runningDifference(CAST( 0  AS Nullable(Int256)));
 SELECT runningDifference(CAST( 0  AS Nullable(UInt8)));
 SELECT runningDifference(CAST( 0  AS Nullable(UInt16)));
 SELECT runningDifference(CAST( 0  AS Nullable(UInt32)));
 SELECT runningDifference(CAST( 0  AS Nullable(UInt64)));
+SELECT runningDifference(CAST( 0  AS Nullable(UInt128)));
+SELECT runningDifference(CAST( 0  AS Nullable(UInt256)));
 SELECT runningDifference(CAST( 0  AS Nullable(Float32)));
 SELECT runningDifference(CAST( 0  AS Nullable(Float64)));
 SELECT runningDifference(CAST( 0  AS Nullable(Date)));
@@ -288,10 +292,14 @@ SELECT runningDifference(CAST(NULL AS Nullable(Int8)));
 SELECT runningDifference(CAST(NULL AS Nullable(Int16)));
 SELECT runningDifference(CAST(NULL AS Nullable(Int32)));
 SELECT runningDifference(CAST(NULL AS Nullable(Int64)));
+SELECT runningDifference(CAST(NULL AS Nullable(Int128)));
+SELECT runningDifference(CAST(NULL AS Nullable(Int256)));
 SELECT runningDifference(CAST(NULL AS Nullable(UInt8)));
 SELECT runningDifference(CAST(NULL AS Nullable(UInt16)));
 SELECT runningDifference(CAST(NULL AS Nullable(UInt32)));
 SELECT runningDifference(CAST(NULL AS Nullable(UInt64)));
+SELECT runningDifference(CAST(NULL AS Nullable(UInt128)));
+SELECT runningDifference(CAST(NULL AS Nullable(UInt256));
 SELECT runningDifference(CAST(NULL AS Nullable(Float32)));
 SELECT runningDifference(CAST(NULL AS Nullable(Float64)));
 SELECT runningDifference(CAST(NULL AS Nullable(Date)));
diff --git a/tests/queries/0_stateless/00653_running_difference.reference b/tests/queries/0_stateless/00653_running_difference.reference
index e2833e0bb3e..624ce92ce0f 100644
--- a/tests/queries/0_stateless/00653_running_difference.reference
+++ b/tests/queries/0_stateless/00653_running_difference.reference
@@ -19,6 +19,30 @@
 \N
 \N
 2
+-
+0
+1
+4
+5
+170141183460469231731687303715884105717
+-
+0
+1
+4
+5
+170141183460469231731687303715884105718
+-
+0
+1
+4
+5
+170141183460469231731687303715884105717
+-
+0
+1
+4
+5
+170141183460469231731687303715884105718
 --Date Difference--
 \N
 \N
diff --git a/tests/queries/0_stateless/00653_running_difference.sql b/tests/queries/0_stateless/00653_running_difference.sql
index f2b4a7300b2..1f18cfc42a7 100644
--- a/tests/queries/0_stateless/00653_running_difference.sql
+++ b/tests/queries/0_stateless/00653_running_difference.sql
@@ -5,6 +5,14 @@ select '-';
 select runningDifference(x) from (select arrayJoin([Null, 1]) as x);
 select '-';
 select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x);
+select '-';
+select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105727]::Array(UInt128)) as x);
+select '-';
+select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105728]::Array(UInt256)) as x);
+select '-';
+select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105727]::Array(Int128)) as x);
+select '-';
+select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105728]::Array(Int256)) as x);
 select '--Date Difference--';
 select runningDifference(x) from (select arrayJoin([Null, Null, toDate('1970-1-1'), toDate('1970-12-31'), Null, Null,  toDate('2010-8-9')]) as x);
 select '-';

From 927efc0f2c0e164a2884c0d2ec087fa7dfbae7c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=5B=EB=8D=B0=EC=9D=B4=ED=84=B0=ED=94=8C=EB=9E=AB=ED=8F=BC?=
 =?UTF-8?q?=ED=8C=80=5D=20=EC=9D=B4=ED=98=B8=EC=84=A0?=
 <hosun.lee@lotte.net>
Date: Wed, 8 Mar 2023 14:01:38 +0900
Subject: [PATCH 288/333] Fix git clone command. Insert space in front of
 shallow option.

---
 docs/en/development/developer-instruction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index b46cc10f99d..ace5ab79bb4 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -67,7 +67,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These
 
 You can also clone the repository via https protocol:
 
-    git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git
+    git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git
 
 This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
 

From ce809e61fb18eb6b05105ffe2fef03124996ba75 Mon Sep 17 00:00:00 2001
From: Hosun Lee <enowys@gmail.com>
Date: Wed, 8 Mar 2023 14:09:18 +0900
Subject: [PATCH 289/333] Revert commit.

---
 docs/en/development/developer-instruction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index ace5ab79bb4..94834f41999 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -39,7 +39,7 @@ Next, you need to download the source files onto your working machine. This is c
 
 In the command line terminal run:
 
-    git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
+    git clone --recursive--shallow-submodules git@github.com:your_github_username/ClickHouse.git
     cd ClickHouse
 
 Note: please, substitute *your_github_username* with what is appropriate!

From aae82a73aa7ae838cdbecda5f1a305186de2fd47 Mon Sep 17 00:00:00 2001
From: Hosun Lee <enowys@gmail.com>
Date: Wed, 8 Mar 2023 14:10:12 +0900
Subject: [PATCH 290/333] Fix git clone command. Insert space in front of
 shallow option.

---
 docs/en/development/developer-instruction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 94834f41999..ace5ab79bb4 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -39,7 +39,7 @@ Next, you need to download the source files onto your working machine. This is c
 
 In the command line terminal run:
 
-    git clone --recursive--shallow-submodules git@github.com:your_github_username/ClickHouse.git
+    git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
     cd ClickHouse
 
 Note: please, substitute *your_github_username* with what is appropriate!

From 7966c114bdbbc97f98430a44f83c41c7bfcc0d9f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 8 Mar 2023 05:40:10 +0000
Subject: [PATCH 291/333] analyzer - fix combine logic for limit expression and
 limit setting

---
 src/Analyzer/QueryTreeBuilder.cpp | 53 +++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index c7b9f9aae08..ed956071d5a 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -355,21 +355,56 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
     if (select_limit_by)
         current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
 
-    /// Combine limit expression with limit setting
+    /// Combine limit expression with limit and offset settings into final limit expression
+    ///    select_limit - limit expression
+    ///    limit        - limit setting
+    ///    offset       - offset setting
+    ///
+    /// if select_limit
+    ///   -- if offset >= select_limit                (expr 0)
+    ///      then (0) (0 rows)
+    ///   -- else if limit > 0                        (expr 1)
+    ///      then min(select_limit - offset, limit)   (expr 2)
+    ///   -- else
+    ///      then (select_limit - offset)             (expr 3)
+    /// else if limit > 0
+    ///    then limit
+    ///
+    /// offset = offset + of_expr
     auto select_limit = select_query_typed.limitLength();
-    if (select_limit && limit)
+    if (select_limit)
     {
-        auto function_node = std::make_shared<FunctionNode>("least");
-        function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
-        function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
+        /// expr 3
+        auto expr_3 = std::make_shared<FunctionNode>("minus");
+        expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
+        expr_3->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
+
+        /// expr 2
+        auto expr_2 = std::make_shared<FunctionNode>("least");
+        expr_2->getArguments().getNodes().push_back(expr_3->clone());
+        expr_2->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
+
+        /// expr 0
+        auto expr_0 = std::make_shared<FunctionNode>("greaterOrEquals");
+        expr_0->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
+        expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
+
+        /// expr 1
+        auto expr_1 = std::make_shared<ConstantNode>(limit > 0);
+
+        auto function_node = std::make_shared<FunctionNode>("multiIf");
+        function_node->getArguments().getNodes().push_back(expr_0);
+        function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(0));
+        function_node->getArguments().getNodes().push_back(expr_1);
+        function_node->getArguments().getNodes().push_back(expr_2);
+        function_node->getArguments().getNodes().push_back(expr_3);
+
         current_query_tree->getLimit() = std::move(function_node);
     }
-    else if (limit)
+    else if (limit > 0)
         current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
-    else if (select_limit)
-        current_query_tree->getLimit() = buildExpression(select_limit, current_context);
 
-    /// Combine offset expression with offset setting
+    /// Combine offset expression with offset setting into final offset expression
     auto select_offset = select_query_typed.limitOffset();
     if (select_offset && offset)
     {

From 036fb0b9efb6d8aef2bc2837fc6696f8ca39c4e2 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 8 Mar 2023 05:52:04 +0000
Subject: [PATCH 292/333] analyzer - add test

---
 .../02676_analyzer_limit_offset.reference     | 63 +++++++++++++++++++
 .../02676_analyzer_limit_offset.sql           | 34 ++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 tests/queries/0_stateless/02676_analyzer_limit_offset.reference
 create mode 100644 tests/queries/0_stateless/02676_analyzer_limit_offset.sql

diff --git a/tests/queries/0_stateless/02676_analyzer_limit_offset.reference b/tests/queries/0_stateless/02676_analyzer_limit_offset.reference
new file mode 100644
index 00000000000..96483268d43
--- /dev/null
+++ b/tests/queries/0_stateless/02676_analyzer_limit_offset.reference
@@ -0,0 +1,63 @@
+0
+1
+2
+3
+4
+15
+15
+16
+16
+17
+30
+30
+31
+31
+32
+102
+103
+104
+105
+105
+106
+107
+108
+109
+105
+106
+107
+108
+109
+60
+60
+61
+61
+62
+62
+63
+63
+64
+64
+60
+35
+35
+36
+36
+37
+37
+38
+38
+39
+39
+105
+106
+107
+108
+109
+12
+13
+13
+14
+14
+15
+15
+16
diff --git a/tests/queries/0_stateless/02676_analyzer_limit_offset.sql b/tests/queries/0_stateless/02676_analyzer_limit_offset.sql
new file mode 100644
index 00000000000..39c6b85f088
--- /dev/null
+++ b/tests/queries/0_stateless/02676_analyzer_limit_offset.sql
@@ -0,0 +1,34 @@
+set allow_experimental_analyzer=1;
+
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (i UInt64) Engine = MergeTree() order by i;
+INSERT INTO test SELECT number FROM numbers(100);
+INSERT INTO test SELECT number FROM numbers(10,100);
+OPTIMIZE TABLE test FINAL;
+
+-- Only set limit
+SET limit = 5;
+SELECT * FROM test; -- 5 rows
+SELECT * FROM test OFFSET 20; -- 5 rows
+SELECT * FROM (SELECT i FROM test LIMIT 10 OFFSET 50) TMP; -- 5 rows
+SELECT * FROM test LIMIT 4 OFFSET 192; -- 4 rows
+SELECT * FROM test LIMIT 10 OFFSET 195; -- 5 rows
+
+-- Only set offset
+SET limit = 0;
+SET offset = 195;
+SELECT * FROM test; -- 5 rows
+SELECT * FROM test OFFSET 20; -- no result
+SELECT * FROM test LIMIT 100; -- no result
+SET offset = 10;
+SELECT * FROM test LIMIT 20 OFFSET 100; -- 10 rows
+SELECT * FROM test LIMIT 11 OFFSET 100; -- 1 rows
+
+-- offset and limit together
+SET limit = 10;
+SELECT * FROM test LIMIT 50 OFFSET 50; -- 10 rows
+SELECT * FROM test LIMIT 50 OFFSET 190; -- 0 rows
+SELECT * FROM test LIMIT 50 OFFSET 185; -- 5 rows
+SELECT * FROM test LIMIT 18 OFFSET 5; -- 8 rows
+
+DROP TABLE test;

From 31b7e6edc64689d1d682506530a426c8f06ae1b6 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 7 Mar 2023 21:21:59 +0100
Subject: [PATCH 293/333] Keep subscriptions for all enabled roles.

---
 src/Access/EnabledRoles.h |  5 +--
 src/Access/RoleCache.cpp  | 71 +++++++++++++++++++++++++--------------
 src/Access/RoleCache.h    | 24 ++++++++++---
 3 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/src/Access/EnabledRoles.h b/src/Access/EnabledRoles.h
index e0d773db343..5de76abe409 100644
--- a/src/Access/EnabledRoles.h
+++ b/src/Access/EnabledRoles.h
@@ -44,10 +44,11 @@ private:
     friend class RoleCache;
     explicit EnabledRoles(const Params & params_);
 
-    void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, scope_guard * notifications);
-
     const Params params;
 
+    /// Called by RoleCache to store `EnabledRolesInfo` in this `EnabledRoles` after the calculation is done.
+    void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, scope_guard * notifications);
+
     std::shared_ptr<const EnabledRolesInfo> info;
     mutable std::mutex info_mutex;
 
diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp
index 7a8c13636d2..bfc6200929d 100644
--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@@ -57,7 +57,9 @@ namespace
 
 
 RoleCache::RoleCache(const AccessControl & access_control_)
-    : access_control(access_control_), cache(600000 /* 10 minutes */) {}
+    : access_control(access_control_), cache(600000 /* 10 minutes */)
+{
+}
 
 
 RoleCache::~RoleCache() = default;
@@ -70,18 +72,18 @@ RoleCache::getEnabledRoles(const std::vector<UUID> & roles, const std::vector<UU
     EnabledRoles::Params params;
     params.current_roles.insert(roles.begin(), roles.end());
     params.current_roles_with_admin_option.insert(roles_with_admin_option.begin(), roles_with_admin_option.end());
-    auto it = enabled_roles.find(params);
-    if (it != enabled_roles.end())
+    auto it = enabled_roles_by_params.find(params);
+    if (it != enabled_roles_by_params.end())
     {
-        auto from_cache = it->second.lock();
-        if (from_cache)
-            return from_cache;
-        enabled_roles.erase(it);
+        if (auto enabled_roles = it->second.enabled_roles.lock())
+            return enabled_roles;
+        enabled_roles_by_params.erase(it);
     }
 
     auto res = std::shared_ptr<EnabledRoles>(new EnabledRoles(params));
-    collectEnabledRoles(*res, nullptr);
-    enabled_roles.emplace(std::move(params), res);
+    SubscriptionsOnRoles subscriptions_on_roles;
+    collectEnabledRoles(*res, subscriptions_on_roles, nullptr);
+    enabled_roles_by_params.emplace(std::move(params), EnabledRolesWithSubscriptions{res, std::move(subscriptions_on_roles)});
     return res;
 }
 
@@ -90,21 +92,23 @@ void RoleCache::collectEnabledRoles(scope_guard * notifications)
 {
     /// `mutex` is already locked.
 
-    for (auto i = enabled_roles.begin(), e = enabled_roles.end(); i != e;)
+    for (auto i = enabled_roles_by_params.begin(), e = enabled_roles_by_params.end(); i != e;)
     {
-        auto elem = i->second.lock();
-        if (!elem)
-            i = enabled_roles.erase(i);
+        auto & item = i->second;
+        if (auto enabled_roles = item.enabled_roles.lock())
+        {
+            collectEnabledRoles(*enabled_roles, item.subscriptions_on_roles, notifications);
+            ++i;
+        }
         else
         {
-            collectEnabledRoles(*elem, notifications);
-            ++i;
+            i = enabled_roles_by_params.erase(i);
         }
     }
 }
 
 
-void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifications)
+void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsOnRoles & subscriptions_on_roles, scope_guard * notifications)
 {
     /// `mutex` is already locked.
 
@@ -112,43 +116,57 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifi
     auto new_info = std::make_shared<EnabledRolesInfo>();
     boost::container::flat_set<UUID> skip_ids;
 
-    auto get_role_function = [this](const UUID & id) { return getRole(id); };
+    /// We need to collect and keep not only enabled roles but also subscriptions for them to be able to recalculate EnabledRolesInfo when some of the roles change.
+    SubscriptionsOnRoles new_subscriptions_on_roles;
+    new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());
 
-    for (const auto & current_role : enabled.params.current_roles)
+    auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); };
+
+    for (const auto & current_role : enabled_roles.params.current_roles)
         collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);
 
-    for (const auto & current_role : enabled.params.current_roles_with_admin_option)
+    for (const auto & current_role : enabled_roles.params.current_roles_with_admin_option)
         collectRoles(*new_info, skip_ids, get_role_function, current_role, true, true);
 
+    /// Remove duplicates from `subscriptions_on_roles`.
+    std::sort(new_subscriptions_on_roles.begin(), new_subscriptions_on_roles.end());
+    new_subscriptions_on_roles.erase(std::unique(new_subscriptions_on_roles.begin(), new_subscriptions_on_roles.end()), new_subscriptions_on_roles.end());
+    subscriptions_on_roles = std::move(new_subscriptions_on_roles);
+
     /// Collect data from the collected roles.
-    enabled.setRolesInfo(new_info, notifications);
+    enabled_roles.setRolesInfo(new_info, notifications);
 }
 
 
-RolePtr RoleCache::getRole(const UUID & role_id)
+RolePtr RoleCache::getRole(const UUID & role_id, SubscriptionsOnRoles & subscriptions_on_roles)
 {
     /// `mutex` is already locked.
 
     auto role_from_cache = cache.get(role_id);
     if (role_from_cache)
+    {
+        subscriptions_on_roles.emplace_back(role_from_cache->second);
         return role_from_cache->first;
+    }
 
-    auto subscription = access_control.subscribeForChanges(role_id,
-                                                    [this, role_id](const UUID &, const AccessEntityPtr & entity)
+    auto on_role_changed_or_removed = [this, role_id](const UUID &, const AccessEntityPtr & entity)
     {
         auto changed_role = entity ? typeid_cast<RolePtr>(entity) : nullptr;
         if (changed_role)
             roleChanged(role_id, changed_role);
         else
             roleRemoved(role_id);
-    });
+    };
+
+    auto subscription_on_role = std::make_shared<scope_guard>(access_control.subscribeForChanges(role_id, on_role_changed_or_removed));
 
     auto role = access_control.tryRead<Role>(role_id);
     if (role)
     {
-        auto cache_value = Poco::SharedPtr<std::pair<RolePtr, scope_guard>>(
-            new std::pair<RolePtr, scope_guard>{role, std::move(subscription)});
+        auto cache_value = Poco::SharedPtr<std::pair<RolePtr, std::shared_ptr<scope_guard>>>(
+            new std::pair<RolePtr, std::shared_ptr<scope_guard>>{role, subscription_on_role});
         cache.add(role_id, cache_value);
+        subscriptions_on_roles.emplace_back(subscription_on_role);
         return role;
     }
 
@@ -162,6 +180,7 @@ void RoleCache::roleChanged(const UUID & role_id, const RolePtr & changed_role)
     scope_guard notifications;
 
     std::lock_guard lock{mutex};
+
     auto role_from_cache = cache.get(role_id);
     if (role_from_cache)
     {
diff --git a/src/Access/RoleCache.h b/src/Access/RoleCache.h
index 51c415d4d1d..24f19cb9d94 100644
--- a/src/Access/RoleCache.h
+++ b/src/Access/RoleCache.h
@@ -24,15 +24,29 @@ public:
         const std::vector<UUID> & current_roles_with_admin_option);
 
 private:
-    void collectEnabledRoles(scope_guard * notifications);
-    void collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifications);
-    RolePtr getRole(const UUID & role_id);
+    using SubscriptionsOnRoles = std::vector<std::shared_ptr<scope_guard>>;
+
+    void collectEnabledRoles(scope_guard * notifications) TSA_REQUIRES(mutex);
+    void collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsOnRoles & subscriptions_on_roles, scope_guard * notifications) TSA_REQUIRES(mutex);
+    RolePtr getRole(const UUID & role_id, SubscriptionsOnRoles & subscriptions_on_roles) TSA_REQUIRES(mutex);
     void roleChanged(const UUID & role_id, const RolePtr & changed_role);
     void roleRemoved(const UUID & role_id);
 
     const AccessControl & access_control;
-    Poco::AccessExpireCache<UUID, std::pair<RolePtr, scope_guard>> cache;
-    std::map<EnabledRoles::Params, std::weak_ptr<EnabledRoles>> enabled_roles;
+
+    Poco::AccessExpireCache<UUID, std::pair<RolePtr, std::shared_ptr<scope_guard>>> TSA_GUARDED_BY(mutex) cache;
+
+    struct EnabledRolesWithSubscriptions
+    {
+        std::weak_ptr<EnabledRoles> enabled_roles;
+
+        /// We need to keep subscriptions for all enabled roles to be able to recalculate EnabledRolesInfo when some of the roles change.
+        /// `cache` also keeps subscriptions but that's not enough because values can be purged from the `cache` anytime.
+        SubscriptionsOnRoles subscriptions_on_roles;
+    };
+
+    std::map<EnabledRoles::Params, EnabledRolesWithSubscriptions> TSA_GUARDED_BY(mutex) enabled_roles_by_params;
+
     mutable std::mutex mutex;
 };
 

From b2b8b7f872591714e10a702c64fd58aebd0e6457 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 3 Mar 2023 19:05:38 +0100
Subject: [PATCH 294/333] Fix RewriteArrayExistsToHasPass

---
 src/Analyzer/Passes/ArrayExistsToHasPass.cpp  | 130 ++++++++++--------
 src/Analyzer/Passes/ArrayExistsToHasPass.h    |  12 +-
 src/Analyzer/Passes/AutoFinalOnQueryPass.cpp  |   3 +-
 .../Passes/ShardNumColumnToFunctionPass.h     |   3 +
 4 files changed, 87 insertions(+), 61 deletions(-)

diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
index b4b8b5b4579..c0f958588f1 100644
--- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp
@@ -1,3 +1,5 @@
+#include <Analyzer/Passes/ArrayExistsToHasPass.h>
+
 #include <Functions/FunctionFactory.h>
 
 #include <Interpreters/Context.h>
@@ -8,71 +10,85 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/LambdaNode.h>
 
-#include "ArrayExistsToHasPass.h"
-
 namespace DB
 {
+
 namespace
 {
-    class RewriteArrayExistsToHasVisitor : public InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>
+
+class RewriteArrayExistsToHasVisitor : public InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
+    using Base::Base;
+
+    void visitImpl(QueryTreeNodePtr & node)
     {
-    public:
-        using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
-        using Base::Base;
+        if (!getSettings().optimize_rewrite_array_exists_to_has)
+            return;
 
-        void visitImpl(QueryTreeNodePtr & node)
+        auto * array_exists_function_node = node->as<FunctionNode>();
+        if (!array_exists_function_node || array_exists_function_node->getFunctionName() != "arrayExists")
+            return;
+
+        auto & array_exists_function_arguments_nodes = array_exists_function_node->getArguments().getNodes();
+        if (array_exists_function_arguments_nodes.size() != 2)
+            return;
+
+        /// lambda function must be like: x -> x = elem
+        auto * lambda_node = array_exists_function_arguments_nodes[0]->as<LambdaNode>();
+        if (!lambda_node)
+            return;
+
+        auto & lambda_arguments_nodes = lambda_node->getArguments().getNodes();
+        if (lambda_arguments_nodes.size() != 1)
+            return;
+
+        const auto & lambda_argument_column_node = lambda_arguments_nodes[0];
+        if (lambda_argument_column_node->getNodeType() != QueryTreeNodeType::COLUMN)
+            return;
+
+        auto * filter_node = lambda_node->getExpression()->as<FunctionNode>();
+        if (!filter_node || filter_node->getFunctionName() != "equals")
+            return;
+
+        const auto & filter_arguments_nodes = filter_node->getArguments().getNodes();
+        if (filter_arguments_nodes.size() != 2)
+            return;
+
+        const auto & filter_lhs_argument_node = filter_arguments_nodes[0];
+        auto filter_lhs_argument_node_type = filter_lhs_argument_node->getNodeType();
+
+        const auto & filter_rhs_argument_node = filter_arguments_nodes[1];
+        auto filter_rhs_argument_node_type = filter_rhs_argument_node->getNodeType();
+
+        QueryTreeNodePtr has_constant_element_argument;
+
+        if (filter_lhs_argument_node_type == QueryTreeNodeType::COLUMN &&
+            filter_rhs_argument_node_type == QueryTreeNodeType::CONSTANT &&
+            filter_lhs_argument_node->isEqual(*lambda_argument_column_node))
         {
-            if (!getSettings().optimize_rewrite_array_exists_to_has)
-                return;
-
-            auto * function_node = node->as<FunctionNode>();
-            if (!function_node || function_node->getFunctionName() != "arrayExists")
-                return;
-
-            auto & function_arguments_nodes = function_node->getArguments().getNodes();
-            if (function_arguments_nodes.size() != 2)
-                return;
-
-            /// lambda function must be like: x -> x = elem
-            auto * lambda_node = function_arguments_nodes[0]->as<LambdaNode>();
-            if (!lambda_node)
-                return;
-
-            auto & lambda_arguments_nodes = lambda_node->getArguments().getNodes();
-            if (lambda_arguments_nodes.size() != 1)
-                return;
-            auto * column_node = lambda_arguments_nodes[0]->as<ColumnNode>();
-
-            auto * filter_node = lambda_node->getExpression()->as<FunctionNode>();
-            if (!filter_node || filter_node->getFunctionName() != "equals")
-                return;
-
-            auto filter_arguments_nodes = filter_node->getArguments().getNodes();
-            if (filter_arguments_nodes.size() != 2)
-                return;
-
-            ColumnNode * filter_column_node = nullptr;
-            if (filter_arguments_nodes[1]->as<ConstantNode>() && (filter_column_node = filter_arguments_nodes[0]->as<ColumnNode>())
-                && filter_column_node->getColumnName() == column_node->getColumnName())
-            {
-                /// Rewrite arrayExists(x -> x = elem, arr) -> has(arr, elem)
-                function_arguments_nodes[0] = std::move(function_arguments_nodes[1]);
-                function_arguments_nodes[1] = std::move(filter_arguments_nodes[1]);
-                function_node->resolveAsFunction(
-                    FunctionFactory::instance().get("has", getContext())->build(function_node->getArgumentColumns()));
-            }
-            else if (
-                filter_arguments_nodes[0]->as<ConstantNode>() && (filter_column_node = filter_arguments_nodes[1]->as<ColumnNode>())
-                && filter_column_node->getColumnName() == column_node->getColumnName())
-            {
-                /// Rewrite arrayExists(x -> elem = x, arr) -> has(arr, elem)
-                function_arguments_nodes[0] = std::move(function_arguments_nodes[1]);
-                function_arguments_nodes[1] = std::move(filter_arguments_nodes[0]);
-                function_node->resolveAsFunction(
-                    FunctionFactory::instance().get("has", getContext())->build(function_node->getArgumentColumns()));
-            }
+            /// Rewrite arrayExists(x -> x = elem, arr) -> has(arr, elem)
+            has_constant_element_argument = filter_rhs_argument_node;
         }
-    };
+        else if (filter_lhs_argument_node_type == QueryTreeNodeType::CONSTANT &&
+            filter_rhs_argument_node_type == QueryTreeNodeType::COLUMN &&
+            filter_rhs_argument_node->isEqual(*lambda_argument_column_node))
+        {
+            /// Rewrite arrayExists(x -> elem = x, arr) -> has(arr, elem)
+            has_constant_element_argument = filter_lhs_argument_node;
+        }
+        else
+        {
+            return;
+        }
+
+        auto has_function = FunctionFactory::instance().get("has", getContext());
+        array_exists_function_arguments_nodes[0] = std::move(array_exists_function_arguments_nodes[1]);
+        array_exists_function_arguments_nodes[1] = std::move(has_constant_element_argument);
+        array_exists_function_node->resolveAsFunction(has_function->build(array_exists_function_node->getArgumentColumns()));
+    }
+};
 
 }
 
diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.h b/src/Analyzer/Passes/ArrayExistsToHasPass.h
index 7d9d1cf3d68..8f4623116e3 100644
--- a/src/Analyzer/Passes/ArrayExistsToHasPass.h
+++ b/src/Analyzer/Passes/ArrayExistsToHasPass.h
@@ -4,8 +4,15 @@
 
 namespace DB
 {
-/// Rewrite possible 'arrayExists(func, arr)' to 'has(arr, elem)' to improve performance
-/// arrayExists(x -> x = 1, arr) -> has(arr, 1)
+
+/** Rewrite possible 'arrayExists(func, arr)' to 'has(arr, elem)' to improve performance.
+  *
+  * Example: SELECT arrayExists(x -> x = 1, arr);
+  * Result: SELECT has(arr, 1);
+  *
+  * Example: SELECT arrayExists(x -> 1 = x, arr);
+  * Result: SELECT has(arr, 1);
+  */
 class RewriteArrayExistsToHasPass final : public IQueryTreePass
 {
 public:
@@ -15,4 +22,5 @@ public:
 
     void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
 };
+
 }
diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
index fdf818681d7..fa5fc0e75a8 100644
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
@@ -22,8 +22,7 @@ public:
 
     void visitImpl(QueryTreeNodePtr & node)
     {
-        const auto & context = getContext();
-        if (!context->getSettingsRef().final)
+        if (!getSettings().final)
             return;
 
         const auto * query_node = node->as<QueryNode>();
diff --git a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
index 83b974954fa..71a038bcf39 100644
--- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
+++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.h
@@ -6,6 +6,9 @@ namespace DB
 {
 
 /** Rewrite _shard_num column into shardNum() function.
+  *
+  * Example: SELECT _shard_num FROM distributed_table;
+  * Result: SELECT shardNum() FROM distributed_table;
   */
 class ShardNumColumnToFunctionPass final : public IQueryTreePass
 {

From 20c0476d2fb9cc4964b8075fdc8507d6f3c467b3 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Wed, 8 Mar 2023 10:51:16 +0100
Subject: [PATCH 295/333] Update
 docs/en/operations/server-configuration-parameters/settings.md

Co-authored-by: pufit <pufit@clickhouse.com>
---
 docs/en/operations/server-configuration-parameters/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 92a8270ac0f..9d8f3ba8355 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -989,7 +989,7 @@ Default value: `100`.
 
 ## max_io_thread_pool_free_size {#max-io-thread-pool-free-size}
 
-If the number of **idle** threads in the IO Thread pool is greater than `max_io_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary.
+If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
 
 Possible values:
 

From 880b34fded1b8a3cee8b690309b11c9903af091a Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Wed, 8 Mar 2023 10:51:34 +0100
Subject: [PATCH 296/333] Update
 docs/en/operations/server-configuration-parameters/settings.md

Co-authored-by: pufit <pufit@clickhouse.com>
---
 docs/en/operations/server-configuration-parameters/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 9d8f3ba8355..3fe815bc79a 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1020,7 +1020,7 @@ Default value: `1000`.
 
 ## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size}
 
-If the number of **idle** threads in the Backups IO Thread pool is greater than `max_backup_io_thread_pool_free_size`, then ClickHouse releases resources occupied by some threads and the pool size is decreased. Threads can be created again if necessary.
+If the number of **idle** threads in the Backups IO Thread pool exceeds `max_backup_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary.
 
 Possible values:
 

From b6f3b81403dd8051be2fc511f4ec87023ef1aca2 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 6 Mar 2023 12:50:09 +0100
Subject: [PATCH 297/333] Query plan filter push down optimization filled JOIN
 fix

---
 src/Processors/QueryPlan/JoinStep.h                 |  2 ++
 .../QueryPlan/Optimizations/filterPushDown.cpp      | 13 ++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h
index a814d541574..e7185f36588 100644
--- a/src/Processors/QueryPlan/JoinStep.h
+++ b/src/Processors/QueryPlan/JoinStep.h
@@ -49,6 +49,8 @@ public:
     String getName() const override { return "FilledJoin"; }
     void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
 
+    const JoinPtr & getJoin() const { return join; }
+
 private:
     void updateOutputStream() override;
 
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
index d466c52725f..37bc894339f 100644
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -314,11 +314,14 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
     if (auto updated_steps = simplePushDownOverStep<DistinctStep>(parent_node, nodes, child))
         return updated_steps;
 
-    if (auto * join = typeid_cast<JoinStep *>(child.get()))
+    auto * join = typeid_cast<JoinStep *>(child.get());
+    auto * filled_join = typeid_cast<FilledJoinStep *>(child.get());
+
+    if (join || filled_join)
     {
         auto join_push_down = [&](JoinKind kind) -> size_t
         {
-            const auto & table_join = join->getJoin()->getTableJoin();
+            const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin();
 
             /// Only inner and left(/right) join are supported. Other types may generate default values for left table keys.
             /// So, if we push down a condition like `key != 0`, not all rows may be filtered.
@@ -326,8 +329,8 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
                 return 0;
 
             bool is_left = kind == JoinKind::Left;
-            const auto & input_header = is_left ? join->getInputStreams().front().header : join->getInputStreams().back().header;
-            const auto & res_header = join->getOutputStream().header;
+            const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header;
+            const auto & res_header = child->getOutputStream().header;
             Names allowed_keys;
             const auto & source_columns = input_header.getNames();
             for (const auto & name : source_columns)
@@ -372,7 +375,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
             return updated_steps;
 
         /// For full sorting merge join we push down both to the left and right tables, because left and right streams are not independent.
-        if (join->allowPushDownToRight())
+        if (join && join->allowPushDownToRight())
         {
             if (size_t updated_steps = join_push_down(JoinKind::Right))
                 return updated_steps;

From 3aed5a4ab455ce1c6600a5a011381432e5543ba4 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 7 Mar 2023 12:35:17 +0100
Subject: [PATCH 298/333] Added tests

---
 ...dicate_push_down_filled_join_fix.reference | 33 +++++++++++++++++++
 ...75_predicate_push_down_filled_join_fix.sql | 26 +++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
 create mode 100644 tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql

diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
new file mode 100644
index 00000000000..ecdb62c5cb5
--- /dev/null
+++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference
@@ -0,0 +1,33 @@
+Expression ((Project names + (Projection + )))
+Header: t1.id UInt64
+        t1.value String
+        t2.value String
+Actions: INPUT : 0 -> t1.id_0 UInt64 : 0
+         INPUT : 1 -> t1.value_1 String : 1
+         INPUT : 2 -> t2.value_2 String : 2
+         ALIAS t1.id_0 :: 0 -> t1.id UInt64 : 3
+         ALIAS t1.value_1 :: 1 -> t1.value String : 0
+         ALIAS t2.value_2 :: 2 -> t2.value String : 1
+Positions: 3 0 1
+  FilledJoin (Filled JOIN)
+  Header: t1.id_0 UInt64
+          t1.value_1 String
+          t2.value_2 String
+    Filter (( + (JOIN actions + Change column names to column identifiers)))
+    Header: t1.id_0 UInt64
+            t1.value_1 String
+    Filter column: equals(t1.id_0, 0_UInt8) (removed)
+    Actions: INPUT : 0 -> id UInt64 : 0
+             INPUT : 1 -> value String : 1
+             COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 2
+             ALIAS id :: 0 -> t1.id_0 UInt64 : 3
+             ALIAS value :: 1 -> t1.value_1 String : 0
+             FUNCTION equals(t1.id_0 : 3, 0_UInt8 :: 2) -> equals(t1.id_0, 0_UInt8) UInt8 : 1
+    Positions: 1 3 0
+      ReadFromMergeTree (default.test_table)
+      Header: id UInt64
+              value String
+      ReadType: Default
+      Parts: 1
+      Granules: 1
+0	Value	JoinValue
diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql
new file mode 100644
index 00000000000..78cb423216b
--- /dev/null
+++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql
@@ -0,0 +1,26 @@
+SET allow_experimental_analyzer = 1;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value String
+) ENGINE=MergeTree ORDER BY id;
+
+INSERT INTO test_table VALUES (0, 'Value');
+
+DROP TABLE IF EXISTS test_table_join;
+CREATE TABLE test_table_join
+(
+    id UInt64,
+    value String
+) ENGINE = Join(All, inner, id);
+
+INSERT INTO test_table_join VALUES (0, 'JoinValue');
+
+EXPLAIN header = 1, actions = 1 SELECT t1.id, t1.value, t2.value FROM test_table AS t1 INNER JOIN test_table_join AS t2 ON t1.id = t2.id WHERE t1.id = 0;
+
+SELECT t1.id, t1.value, t2.value FROM test_table AS t1 INNER JOIN test_table_join AS t2 ON t1.id = t2.id WHERE t1.id = 0;
+
+DROP TABLE test_table_join;
+DROP TABLE test_table;

From ff6c9916e6e511c333b33d0ece1607897a2aa75e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 8 Mar 2023 12:13:28 +0100
Subject: [PATCH 299/333] Improvements based on the PR

---
 src/Client/ClientBase.cpp | 16 ++++++++--------
 src/Client/TestHint.cpp   |  8 ++++----
 src/Client/TestHint.h     | 23 +++++++++++++----------
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index b5c662b4a80..4476678f0f4 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1834,7 +1834,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
     {
         /// disable logs if expects errors
         TestHint test_hint(all_queries_text);
-        if (!test_hint.clientErrors().empty() || !test_hint.serverErrors().empty())
+        if (test_hint.hasClientErrors() || test_hint.hasServerErrors())
             processTextAsSingleQuery("SET send_logs_level = 'fatal'");
     }
 
@@ -1876,7 +1876,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 // the query ends because we failed to parse it, so we consume
                 // the entire line.
                 TestHint hint(String(this_query_begin, this_query_end - this_query_begin));
-                if (!hint.serverErrors().empty())
+                if (hint.hasServerErrors())
                 {
                     // Syntax errors are considered as client errors
                     current_exception->addMessage("\nExpected server error: {}.", hint.serverErrors());
@@ -1886,7 +1886,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 if (std::find(hint.clientErrors().begin(), hint.clientErrors().end(), current_exception->code())
                     == hint.clientErrors().end())
                 {
-                    if (!hint.clientErrors().empty())
+                    if (hint.hasClientErrors())
                         current_exception->addMessage("\nExpected client error: {}.", hint.clientErrors());
 
                     current_exception->rethrow();
@@ -1936,7 +1936,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 bool error_matches_hint = true;
                 if (have_error)
                 {
-                    if (!test_hint.serverErrors().empty())
+                    if (test_hint.hasServerErrors())
                     {
                         if (!server_exception)
                         {
@@ -1953,7 +1953,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                                        test_hint.serverErrors(), server_exception->code(), full_query);
                         }
                     }
-                    if (!test_hint.clientErrors().empty())
+                    if (test_hint.hasClientErrors())
                     {
                         if (!client_exception)
                         {
@@ -1970,7 +1970,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                                        test_hint.clientErrors(), client_exception->code(), full_query);
                         }
                     }
-                    if (test_hint.clientErrors().empty() && test_hint.serverErrors().empty())
+                    if (!test_hint.hasClientErrors() && !test_hint.hasServerErrors())
                     {
                         // No error was expected but it still occurred. This is the
                         // default case without test hint, doesn't need additional
@@ -1980,14 +1980,14 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                 }
                 else
                 {
-                    if (!test_hint.clientErrors().empty())
+                    if (test_hint.hasClientErrors())
                     {
                         error_matches_hint = false;
                         fmt::print(stderr,
                                    "The query succeeded but the client error '{}' was expected (query: {}).\n",
                                    test_hint.clientErrors(), full_query);
                     }
-                    if (!test_hint.serverErrors().empty())
+                    if (test_hint.hasServerErrors())
                     {
                         error_matches_hint = false;
                         fmt::print(stderr,
diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp
index 1500a8baf26..8cc91250721 100644
--- a/src/Client/TestHint.cpp
+++ b/src/Client/TestHint.cpp
@@ -9,7 +9,7 @@
 
 namespace DB::ErrorCodes
 {
-extern const int CANNOT_PARSE_TEXT;
+    extern const int CANNOT_PARSE_TEXT;
 }
 
 namespace DB
@@ -54,9 +54,9 @@ TestHint::TestHint(const String & query_)
 
 void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
 {
-    std::unordered_set<String> commands{"echo", "echoOn", "echoOff"};
+    std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff"};
 
-    std::unordered_set<String> command_errors{
+    std::unordered_set<std::string_view> command_errors{
         "serverError",
         "clientError",
     };
@@ -76,7 +76,7 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
         else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item))
         {
             /// Everything after this must be a list of errors separated by comma
-            error_vector error_codes;
+            ErrorVector error_codes;
             while (!token.isEnd())
             {
                 token = comment_lexer.nextToken();
diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h
index d9650b9d4d7..fb1dbfb72dc 100644
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@@ -18,7 +18,7 @@ class Lexer;
 /// The following comment hints are supported:
 ///
 /// - "-- { serverError 60 }" -- in case of you are expecting server error.
-/// - "-- { serverError 16, 36 }" -- in case of you are expecting one of the 2 errors
+/// - "-- { serverError 16, 36 }" -- in case of you are expecting one of the 2 errors.
 ///
 /// - "-- { clientError 20 }" -- in case of you are expecting client error.
 /// - "-- { clientError 20, 60, 92 }" -- It's expected that the client will return one of the 3 errors.
@@ -52,17 +52,20 @@ class Lexer;
 class TestHint
 {
 public:
-    using error_vector = std::vector<int>;
+    using ErrorVector = std::vector<int>;
     TestHint(const String & query_);
 
     const auto & serverErrors() const { return server_errors; }
     const auto & clientErrors() const { return client_errors; }
     std::optional<bool> echoQueries() const { return echo; }
 
+    bool hasClientErrors() { return !client_errors.empty(); }
+    bool hasServerErrors() { return !server_errors.empty(); }
+
 private:
     const String & query;
-    error_vector server_errors{};
-    error_vector client_errors{};
+    ErrorVector server_errors{};
+    ErrorVector client_errors{};
     std::optional<bool> echo;
 
     void parse(Lexer & comment_lexer, bool is_leading_hint);
@@ -91,7 +94,7 @@ private:
 }
 
 template <>
-struct fmt::formatter<DB::TestHint::error_vector>
+struct fmt::formatter<DB::TestHint::ErrorVector>
 {
     static constexpr auto parse(format_parse_context & ctx)
     {
@@ -106,13 +109,13 @@ struct fmt::formatter<DB::TestHint::error_vector>
     }
 
     template <typename FormatContext>
-    auto format(const DB::TestHint::error_vector & error_vector, FormatContext & ctx)
+    auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx)
     {
-        if (error_vector.empty())
+        if (ErrorVector.empty())
             return format_to(ctx.out(), "{}", 0);
-        else if (error_vector.size() == 1)
-            return format_to(ctx.out(), "{}", error_vector[0]);
+        else if (ErrorVector.size() == 1)
+            return format_to(ctx.out(), "{}", ErrorVector[0]);
         else
-            return format_to(ctx.out(), "One of [{}]", fmt::join(error_vector, ", "));
+            return format_to(ctx.out(), "[{}]", fmt::join(ErrorVector, ", "));
     }
 };

From 969bb0e1afc8f60699b46fcbef29e69f29a211f5 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 8 Mar 2023 12:31:55 +0000
Subject: [PATCH 300/333] Remove test 02153_native_bounds_check.sh

---
 .../0_stateless/02153_native_bounds_check.reference   |  1 -
 .../queries/0_stateless/02153_native_bounds_check.sh  | 11 -----------
 2 files changed, 12 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02153_native_bounds_check.reference
 delete mode 100755 tests/queries/0_stateless/02153_native_bounds_check.sh

diff --git a/tests/queries/0_stateless/02153_native_bounds_check.reference b/tests/queries/0_stateless/02153_native_bounds_check.reference
deleted file mode 100644
index d00491fd7e5..00000000000
--- a/tests/queries/0_stateless/02153_native_bounds_check.reference
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/tests/queries/0_stateless/02153_native_bounds_check.sh b/tests/queries/0_stateless/02153_native_bounds_check.sh
deleted file mode 100755
index a3475ddacae..00000000000
--- a/tests/queries/0_stateless/02153_native_bounds_check.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-# Should correctly handle error.
-
-${CLICKHOUSE_LOCAL} --query "SELECT toString(number) AS a, toString(number) AS a FROM numbers(10)" --output-format Native |
-    ${CLICKHOUSE_LOCAL} --query "SELECT * FROM table" --input-format Native --structure 'a LowCardinality(String)' 2>&1 |
-    grep -c -F Exception

From 79ef95bc0c2c2e983cdbb90afc75177aa8597430 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 8 Mar 2023 12:54:22 +0000
Subject: [PATCH 301/333] Fix tests and builds

---
 .../Formats/Impl/ParquetBlockOutputFormat.cpp    |   2 +-
 .../01308_orc_output_format_arrays.reference     | Bin 567 -> 36 bytes
 .../01308_orc_output_format_arrays.sh            |   2 +-
 .../queries/0_stateless/02426_orc_bug.reference  | Bin 312 -> 36 bytes
 tests/queries/0_stateless/02426_orc_bug.sh       |   9 +++++++++
 tests/queries/0_stateless/02426_orc_bug.sql      |   3 ---
 6 files changed, 11 insertions(+), 5 deletions(-)
 create mode 100755 tests/queries/0_stateless/02426_orc_bug.sh
 delete mode 100644 tests/queries/0_stateless/02426_orc_bug.sql

diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
index cedd8a9c54c..759f773a574 100644
--- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp
@@ -20,7 +20,7 @@ namespace ErrorCodes
 namespace
 {
 
-static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
+parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
 {
     switch (settings.parquet.output_version)
     {
diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference
index 1f9646ac112132378f512bb4e3a610f6019698e1..7feea7cec35600166ea51a6950459c05eb6c7d88 100644
GIT binary patch
literal 36
rcmXpqH%qZFO-ZpdOEI=INij9EFfubvv`jTIH%c@%Pfk%#(B%REtfL6?

literal 567
zcmaKou}Z{15Qb-Fv+J_CU<jx~usF^pWfVlX>fj1(^yJ73Txpxm?(hk$eF1A9K)8pp
z5ER#OZ*wFHHuLYd|I9G_i{;#<P60rXSh@w5IW4fmgAPa56vk5CaQnBbH>AZq?1nOH
zi@h-PB)k2_Z@x78PeSsIycPNP4-!J}r2;h0Sd#;1L6P@^5+4Osk&~#JS^h!#DJTY|
z99adXz?c*3z$*GG`bp%3P4IQ>*K8yV+53Mj)#Y{L&fPY*Z5^i6IX{^Uqp&J}r7{({
z<CF0VRx~-bqk>SCX4ST<`_rgrW2m5pNorkz6v0~6|4#a#fA`ch4R^#kxNzslh121M
bv&Wk&w9hE%uGh(gAgwNH^YEtBVmaq;55+tO

diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh
index 1d9aea353b6..498854874cf 100755
--- a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh
+++ b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh
@@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE orc (array1 Array(Int32), array2 Array(
 
 $CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES ([1,2,3,4,5], [[1,2], [3,4], [5]]), ([42], [[42, 42], [42]])";
 
-$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC";
+$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum;
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE orc";
 
diff --git a/tests/queries/0_stateless/02426_orc_bug.reference b/tests/queries/0_stateless/02426_orc_bug.reference
index e5ad2b492892318e8fa83954efdecbb2bf287a91..baa88da215881130acc288c7ce2a15bd34f45e90 100644
GIT binary patch
literal 36
rcmcCawlFudut-WXGfYc0H%zs(G)*xwOi4*gGB-9%F*jFG(B%REwulK^

literal 312
zcmeYda^~XZVrF1q5MtqA3Si*k<zfYL;9OA{mxqH<h>e3ufJp+V>i>cN2mUcI{%`ot
z$i>FNCcq%U3#J(*_`x)jggB690E#n#X+|*3B)}-a4CVt30n&^h!+_QT)ghBo%<jG+
zTvA*R_drM#+c}s8rZP%#0V!h%ZiR3SCM87<9wA0XDUrmYqQpwC{LDO!=;&A-g=oXr
zSWN{64F&@SC2kHbAx0)Bhl7I=!ZKj80$TtjQLJ&8;OD`}kpuKZLj#Kv69cz~i~(EI
L7iMPvAZKv^R=p!4

diff --git a/tests/queries/0_stateless/02426_orc_bug.sh b/tests/queries/0_stateless/02426_orc_bug.sh
new file mode 100755
index 00000000000..7a7ad9f1783
--- /dev/null
+++ b/tests/queries/0_stateless/02426_orc_bug.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --query="SELECT arrayJoin([[], [1]]) FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum;
+
diff --git a/tests/queries/0_stateless/02426_orc_bug.sql b/tests/queries/0_stateless/02426_orc_bug.sql
deleted file mode 100644
index 7016f1ceb70..00000000000
--- a/tests/queries/0_stateless/02426_orc_bug.sql
+++ /dev/null
@@ -1,3 +0,0 @@
--- Tags: no-fasttest
-
-SELECT arrayJoin([[], [1]]) FORMAT ORC;

From b4061d8aa4cda0853091b8b0b42e788ffaa8c68c Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 8 Mar 2023 13:06:10 +0000
Subject: [PATCH 302/333] Add logs

---
 src/Coordination/Changelog.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index ddedae4fa0f..13da4dffac8 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -211,13 +211,20 @@ public:
     void flush()
     {
         auto * file_buffer = tryGetFileBuffer();
+        LOG_TRACE(log, "Trying to flush");
         if (file_buffer)
         {
             /// Fsync file system if needed
             if (log_file_settings.force_sync)
+            {
                 file_buffer->sync();
+            }
             else
+            {
+                LOG_TRACE(log, "Flushing with next, in buffer {}", file_buffer->offset());
                 file_buffer->next();
+                LOG_TRACE(log, "Flushed in total {}", file_buffer->count());
+            }
         }
     }
 

From 07a39887499c7eadb4198a724d024c98607065d3 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Wed, 8 Mar 2023 08:26:07 -0500
Subject: [PATCH 303/333] geteuid is enough

---
 utils/self-extracting-executable/decompressor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 1374c269dc1..d41b9b1ebe1 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -170,7 +170,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
 
 bool isSudo()
 {
-    return geteuid() == 0 && getenv("SUDO_USER") && getenv("SUDO_UID") && getenv("SUDO_GID"); // NOLINT(concurrency-mt-unsafe)
+    return geteuid() == 0;
 }
 
 /// Read data about files and decomrpess them.

From df698355dff9d3b364cbbff50c6e84b5192236cc Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 8 Mar 2023 14:25:24 +0000
Subject: [PATCH 304/333] comment

---
 src/Analyzer/QueryTreeBuilder.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index ed956071d5a..34a75dd0c33 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -356,6 +356,9 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
         current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
 
     /// Combine limit expression with limit and offset settings into final limit expression
+    /// The sequence of application is next - offset expression, limit expression, offset setting, limit setting.
+    /// Since offset setting is applied after limit expression, but we want to transfer settings into expression
+    /// we must decrease limit expression by offset setting and then add offset setting to offset expression.
     ///    select_limit - limit expression
     ///    limit        - limit setting
     ///    offset       - offset setting

From 0fb537e8805aa54f1d951bd1b5a58d28b92c69c5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 8 Mar 2023 15:27:43 +0100
Subject: [PATCH 305/333] suppress expected errors from test 01111 in Upgrade
 check

---
 docker/test/upgrade/run.sh                           |  4 +++-
 src/Interpreters/DDLWorker.cpp                       | 12 ++++++------
 .../01111_create_drop_replicated_db_stress.sh        |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index ce8a56c777e..93f07bbb551 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -161,7 +161,9 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
            -e "Authentication failed" \
            -e "Cannot flush" \
            -e "Container already exists" \
-    /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
+    clickhouse-server.upgrade.log \
+    | grep -av -e "_repl_01111_.*Mapping for table with UUID" \
+    | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
     && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
         >> /test_output/test_results.tsv \
     || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 7179ce94e0b..537380862fa 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -169,11 +169,11 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         return {};
     }
 
-    auto write_error_status = [&](const String & host_id, const String & error_message, const String & reason)
+    auto write_error_status = [&](const String & host_id, const ExecutionStatus & status, const String & reason)
     {
-        LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, error_message);
+        LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, status.message);
         createStatusDirs(entry_path, zookeeper);
-        zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, error_message, zkutil::CreateMode::Persistent);
+        zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, status.serializeText(), zkutil::CreateMode::Persistent);
     };
 
     try
@@ -187,7 +187,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
         /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
         /// Otherwise, that node will be ignored by DDLQueryStatusSource.
         out_reason = "Incorrect task format";
-        write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
+        write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException(), out_reason);
         return {};
     }
 
@@ -212,7 +212,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     catch (...)
     {
         out_reason = "Cannot parse query or obtain cluster info";
-        write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
+        write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException(), out_reason);
         return {};
     }
 
@@ -650,7 +650,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
             bool status_written_by_table_or_db = task.ops.empty();
             if (status_written_by_table_or_db)
             {
-                throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.serializeText());
+                throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message);
             }
             else
             {
diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
index 8c9efb75e96..770a0780ca2 100755
--- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
+++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh
@@ -15,7 +15,7 @@ function create_db()
         # Multiple database replicas on one server are actually not supported (until we have namespaces).
         # So CREATE TABLE queries will fail on all replicas except one. But it's still makes sense for a stress test.
         $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 --query \
-        "create database if not exists ${CLICKHOUSE_DATABASE}_repl_$SUFFIX engine=Replicated('/test/01111/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '$SHARD', '$REPLICA')" \
+        "create database if not exists ${CLICKHOUSE_DATABASE}_repl_01111_$SUFFIX engine=Replicated('/test/01111/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '$SHARD', '$REPLICA')" \
          2>&1| grep -Fa "Exception: " | grep -Fv "REPLICA_ALREADY_EXISTS" | grep -Fiv "Will not try to start it up" | \
          grep -Fv "Coordination::Exception" | grep -Fv "already contains some data and it does not look like Replicated database path"
         sleep 0.$RANDOM

From debd69f03af5939b266c713c9ff9deb1884d3691 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 7 Mar 2023 20:17:09 +0100
Subject: [PATCH 306/333] fix an assertion with implicit transactions in
 interserver mode

---
 src/Interpreters/executeQuery.cpp             | 60 ++++++++-----------
 .../02345_implicit_transaction.reference      |  3 +
 .../02345_implicit_transaction.sql            | 12 +++-
 3 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index e2aa2c02fc8..fb90ba2da7b 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -451,10 +451,24 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     /// Avoid early destruction of process_list_entry if it was not saved to `res` yet (in case of exception)
     ProcessList::EntryPtr process_list_entry;
     BlockIO res;
-    std::shared_ptr<InterpreterTransactionControlQuery> implicit_txn_control{};
+    auto implicit_txn_control = std::make_shared<bool>(false);
     String query_database;
     String query_table;
 
+    auto execute_implicit_tcl_query = [implicit_txn_control](const ContextMutablePtr & query_context, ASTTransactionControl::QueryType tcl_type)
+    {
+        /// Unset the flag on COMMIT and ROLLBACK
+        SCOPE_EXIT({ if (tcl_type != ASTTransactionControl::BEGIN) *implicit_txn_control = false; });
+
+        ASTPtr tcl_ast = std::make_shared<ASTTransactionControl>(tcl_type);
+        InterpreterTransactionControlQuery tc(tcl_ast, query_context);
+        tc.execute();
+
+        /// Set the flag after successful BIGIN
+        if (tcl_type == ASTTransactionControl::BEGIN)
+            *implicit_txn_control = true;
+    };
+
     try
     {
         if (auto txn = context->getCurrentTransaction())
@@ -674,14 +688,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                     if (context->isGlobalContext())
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot create transactions");
 
-                    /// If there is no session (which is the default for the HTTP Handler), set up one just for this as it is necessary
-                    /// to control the transaction lifetime
-                    if (!context->hasSessionContext())
-                        context->makeSessionContext();
-
-                    auto tc = std::make_shared<InterpreterTransactionControlQuery>(ast, context);
-                    tc->executeBegin(context->getSessionContext());
-                    implicit_txn_control = std::move(tc);
+                    execute_implicit_tcl_query(context, ASTTransactionControl::BEGIN);
                 }
                 catch (Exception & e)
                 {
@@ -949,6 +956,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                                     log_processors_profiles = settings.log_processors_profiles,
                                     status_info_to_query_log,
                                     implicit_txn_control,
+                                    execute_implicit_tcl_query,
                                     pulling_pipeline = pipeline.pulling(),
                                     query_span](QueryPipeline & query_pipeline) mutable
             {
@@ -1062,21 +1070,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                         }
                     }
 
-                    if (implicit_txn_control)
-                    {
-                        try
-                        {
-                            implicit_txn_control->executeCommit(context->getSessionContext());
-                            implicit_txn_control.reset();
-                        }
-                        catch (const Exception &)
-                        {
-                            /// An exception might happen when trying to commit the transaction. For example we might get an immediate exception
-                            /// because ZK is down and wait_changes_become_visible_after_commit_mode == WAIT_UNKNOWN
-                            implicit_txn_control.reset();
-                            throw;
-                        }
-                    }
+                    if (*implicit_txn_control)
+                        execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT);
                 }
 
                 if (query_span)
@@ -1104,13 +1099,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                                        quota(quota),
                                        status_info_to_query_log,
                                        implicit_txn_control,
+                                       execute_implicit_tcl_query,
                                        query_span](bool log_error) mutable
             {
-                if (implicit_txn_control)
-                {
-                    implicit_txn_control->executeRollback(context->getSessionContext());
-                    implicit_txn_control.reset();
-                }
+                if (*implicit_txn_control)
+                    execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK);
                 else if (auto txn = context->getCurrentTransaction())
                     txn->onException();
 
@@ -1179,15 +1172,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     }
     catch (...)
     {
-        if (implicit_txn_control)
-        {
-            implicit_txn_control->executeRollback(context->getSessionContext());
-            implicit_txn_control.reset();
-        }
+        if (*implicit_txn_control)
+            execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK);
         else if (auto txn = context->getCurrentTransaction())
-        {
             txn->onException();
-        }
 
         if (!internal)
             onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds());
diff --git a/tests/queries/0_stateless/02345_implicit_transaction.reference b/tests/queries/0_stateless/02345_implicit_transaction.reference
index e4dd35600f7..fb4254ec5a7 100644
--- a/tests/queries/0_stateless/02345_implicit_transaction.reference
+++ b/tests/queries/0_stateless/02345_implicit_transaction.reference
@@ -12,3 +12,6 @@ in_transaction	10000
 out_transaction	0
 {"'implicit_True'":"implicit_True","all":"2","is_empty":0}
 {"'implicit_False'":"implicit_False","all":"2","is_empty":1}
+0
+0
+0
diff --git a/tests/queries/0_stateless/02345_implicit_transaction.sql b/tests/queries/0_stateless/02345_implicit_transaction.sql
index e3f9cca37d1..b0cb4ab6305 100644
--- a/tests/queries/0_stateless/02345_implicit_transaction.sql
+++ b/tests/queries/0_stateless/02345_implicit_transaction.sql
@@ -1,4 +1,4 @@
--- Tags: no-ordinary-database
+-- Tags: no-ordinary-database, no-fasttest
 
 CREATE TABLE landing (n Int64) engine=MergeTree order by n;
 CREATE TABLE target  (n Int64) engine=MergeTree order by n;
@@ -92,3 +92,13 @@ WHERE
     query LIKE '-- Verify that the transaction_id column is NOT populated without transaction%'
 GROUP BY transaction_id
 FORMAT JSONEachRow;
+
+SET implicit_transaction=1;
+SET throw_on_unsupported_query_inside_transaction=1;
+SELECT * FROM system.one;
+SELECT * FROM cluster('test_cluster_interserver_secret', system, one);  -- { serverError NOT_IMPLEMENTED }
+SELECT * FROM cluster('test_cluster_two_shards', system, one);  -- { serverError NOT_IMPLEMENTED }
+SET throw_on_unsupported_query_inside_transaction=0;
+-- there's not session in the interserver mode
+SELECT * FROM cluster('test_cluster_interserver_secret', system, one) FORMAT Null;  -- { serverError INVALID_TRANSACTION }
+SELECT * FROM cluster('test_cluster_two_shards', system, one);

From 95beedc8a4bb3fc8bd33318b825d3718bdff945c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 8 Mar 2023 15:56:13 +0000
Subject: [PATCH 307/333] Fix 02343_group_by_use_nulls test

---
 tests/queries/0_stateless/02343_group_by_use_nulls.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls.sql b/tests/queries/0_stateless/02343_group_by_use_nulls.sql
index a979a78be0d..e1d4021a943 100644
--- a/tests/queries/0_stateless/02343_group_by_use_nulls.sql
+++ b/tests/queries/0_stateless/02343_group_by_use_nulls.sql
@@ -1,3 +1,4 @@
+set optimize_group_by_function_keys=0;
 -- { echoOn }
 SELECT number, number % 2, sum(number) AS val
 FROM numbers(10)

From 52767ea186da08f8a48f5ee2941c1a86d20b0521 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 8 Mar 2023 19:25:08 +0300
Subject: [PATCH 308/333] Update 02368_cancel_write_into_hdfs.sh

---
 tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh b/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh
index 8262cd7eab5..65d0b3f434f 100755
--- a/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh
+++ b/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-stress
+# Tags: no-fasttest, no-asan, no-tsan, no-msan, no-ubsan, no-debug
+# FIXME https://github.com/ClickHouse/ClickHouse/issues/47207
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From c3b9af96c460077d364c7d8e95e73340456d6281 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 8 Mar 2023 17:19:04 +0000
Subject: [PATCH 309/333] fix ALTER CLEAR COLUMN with sparse columns

---
 src/Storages/MergeTree/MergeTask.cpp          | 40 ++++++++++++++++++-
 src/Storages/MergeTree/MutateTask.cpp         |  6 +++
 ...2675_sparse_columns_clear_column.reference |  6 +++
 .../02675_sparse_columns_clear_column.sql     | 34 ++++++++++++++++
 4 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02675_sparse_columns_clear_column.reference
 create mode 100644 tests/queries/0_stateless/02675_sparse_columns_clear_column.sql

diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index b961b70428e..9d9d8420e2c 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -96,6 +96,32 @@ static void extractMergingAndGatheringColumns(
     }
 }
 
+static void addMissedColumnsToSerializationInfos(
+    size_t num_rows_in_parts,
+    const Names & part_columns,
+    const ColumnsDescription & storage_columns,
+    const SerializationInfo::Settings & info_settings,
+    SerializationInfoByName & new_infos)
+{
+    NameSet part_columns_set(part_columns.begin(), part_columns.end());
+
+    for (const auto & column : storage_columns)
+    {
+        if (part_columns_set.contains(column.name))
+            continue;
+
+        if (column.default_desc.kind != ColumnDefaultKind::Default)
+            continue;
+
+        if (column.default_desc.expression)
+            continue;
+
+        auto new_info = column.type->createSerializationInfo(info_settings);
+        new_info->addDefaults(num_rows_in_parts);
+        new_infos.emplace(column.name, std::move(new_info));
+    }
+}
+
 
 bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
 {
@@ -205,7 +231,19 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
             ctx->force_ttl = true;
         }
 
-        infos.add(part->getSerializationInfos());
+        if (!info_settings.isAlwaysDefault())
+        {
+            auto part_infos = part->getSerializationInfos();
+
+            addMissedColumnsToSerializationInfos(
+                part->rows_count,
+                part->getColumns().getNames(),
+                global_ctx->metadata_snapshot->getColumns(),
+                info_settings,
+                part_infos);
+
+            infos.add(part_infos);
+        }
     }
 
     global_ctx->new_data_part->setColumns(global_ctx->storage_columns, infos);
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index bcb1d5d2c28..526f869a3ac 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -626,6 +626,12 @@ static NameToNameVector collectFilesForRenames(
         }
     }
 
+    if (!source_part->getSerializationInfos().empty()
+        && new_part->getSerializationInfos().empty())
+    {
+        rename_vector.emplace_back(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, "");
+    }
+
     return rename_vector;
 }
 
diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference b/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference
new file mode 100644
index 00000000000..56fa4a9ebea
--- /dev/null
+++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference
@@ -0,0 +1,6 @@
+arr	Default
+v	Sparse
+arr	Default
+arr	Default
+v	Sparse
+0	[]
diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql
new file mode 100644
index 00000000000..781030ef7b4
--- /dev/null
+++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql
@@ -0,0 +1,34 @@
+DROP TABLE IF EXISTS t_sparse_columns_clear;
+
+CREATE TABLE t_sparse_columns_clear (arr Array(UInt64), v UInt64)
+ENGINE = MergeTree ORDER BY tuple()
+SETTINGS
+    ratio_of_defaults_for_sparse_serialization = 0.9,
+    min_bytes_for_wide_part=0;
+
+INSERT INTO t_sparse_columns_clear SELECT [number], 0 FROM numbers(1000);
+
+SELECT column, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active
+ORDER BY column;
+
+SET mutations_sync = 2;
+SET alter_sync = 2;
+
+ALTER TABLE t_sparse_columns_clear CLEAR COLUMN v;
+
+SELECT column, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active
+ORDER BY column;
+
+OPTIMIZE TABLE t_sparse_columns_clear FINAL;
+
+SELECT column, serialization_kind FROM system.parts_columns
+WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active
+ORDER BY column;
+
+DROP TABLE t_sparse_columns_clear SYNC;
+
+SYSTEM FLUSH LOGS;
+
+SELECT count(), groupArray(message) FROM system.text_log WHERE logger_name LIKE '%' || currentDatabase() || '.t_sparse_columns_clear' || '%' AND level = 'Error';

From 86afb8a72d7143c6662cdcdf2beed79c2c2315f6 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 9 Mar 2023 02:24:37 +0000
Subject: [PATCH 310/333] shortcut for trivial case, fix test, fix comment

---
 src/Analyzer/QueryTreeBuilder.cpp             | 52 +++++++++++--------
 .../02565_analyzer_limit_settings.reference   |  1 -
 2 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 34a75dd0c33..7dd988619ac 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -356,7 +356,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
         current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
 
     /// Combine limit expression with limit and offset settings into final limit expression
-    /// The sequence of application is next - offset expression, limit expression, offset setting, limit setting.
+    /// The sequence of application is the following - offset expression, limit expression, offset setting, limit setting.
     /// Since offset setting is applied after limit expression, but we want to transfer settings into expression
     /// we must decrease limit expression by offset setting and then add offset setting to offset expression.
     ///    select_limit - limit expression
@@ -377,32 +377,40 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
     auto select_limit = select_query_typed.limitLength();
     if (select_limit)
     {
-        /// expr 3
-        auto expr_3 = std::make_shared<FunctionNode>("minus");
-        expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
-        expr_3->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
+        /// Shortcut
+        if (offset == 0 && limit == 0)
+        {
+            current_query_tree->getLimit() = buildExpression(select_limit, current_context);
+        }
+        else
+        {
+            /// expr 3
+            auto expr_3 = std::make_shared<FunctionNode>("minus");
+            expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
+            expr_3->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
 
-        /// expr 2
-        auto expr_2 = std::make_shared<FunctionNode>("least");
-        expr_2->getArguments().getNodes().push_back(expr_3->clone());
-        expr_2->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
+            /// expr 2
+            auto expr_2 = std::make_shared<FunctionNode>("least");
+            expr_2->getArguments().getNodes().push_back(expr_3->clone());
+            expr_2->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
 
-        /// expr 0
-        auto expr_0 = std::make_shared<FunctionNode>("greaterOrEquals");
-        expr_0->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
-        expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
+            /// expr 0
+            auto expr_0 = std::make_shared<FunctionNode>("greaterOrEquals");
+            expr_0->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
+            expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
 
-        /// expr 1
-        auto expr_1 = std::make_shared<ConstantNode>(limit > 0);
+            /// expr 1
+            auto expr_1 = std::make_shared<ConstantNode>(limit > 0);
 
-        auto function_node = std::make_shared<FunctionNode>("multiIf");
-        function_node->getArguments().getNodes().push_back(expr_0);
-        function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(0));
-        function_node->getArguments().getNodes().push_back(expr_1);
-        function_node->getArguments().getNodes().push_back(expr_2);
-        function_node->getArguments().getNodes().push_back(expr_3);
+            auto function_node = std::make_shared<FunctionNode>("multiIf");
+            function_node->getArguments().getNodes().push_back(expr_0);
+            function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(0));
+            function_node->getArguments().getNodes().push_back(expr_1);
+            function_node->getArguments().getNodes().push_back(expr_2);
+            function_node->getArguments().getNodes().push_back(expr_3);
 
-        current_query_tree->getLimit() = std::move(function_node);
+            current_query_tree->getLimit() = std::move(function_node);
+        }
     }
     else if (limit > 0)
         current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
diff --git a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference
index 6f23097612e..87e9f407cc8 100644
--- a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference
+++ b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference
@@ -62,7 +62,6 @@ SELECT * FROM numbers(10);
 SELECT * FROM numbers(10) LIMIT 3 OFFSET 2;
 3
 4
-5
 SELECT * FROM numbers(10) LIMIT 5 OFFSET 2;
 3
 4

From 1728d6ed2c15f363fccf2091d819a042e1ded820 Mon Sep 17 00:00:00 2001
From: Derek Chia <derek.chia@clickhouse.com>
Date: Thu, 9 Mar 2023 11:44:27 +0800
Subject: [PATCH 311/333] Update settings.md

- Change Prometheus port to 9363 as listed in https://clickhouse.com/docs/en/guides/sre/network-ports/
---
 .../server-configuration-parameters/settings.md      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 3fe815bc79a..99daddeeb99 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1318,12 +1318,12 @@ Settings:
 
 ``` xml
  <prometheus>
-        <endpoint>/metrics</endpoint>
-        <port>8001</port>
-        <metrics>true</metrics>
-        <events>true</events>
-        <asynchronous_metrics>true</asynchronous_metrics>
-    </prometheus>
+    <endpoint>/metrics</endpoint>
+    <port>9363</port>
+    <metrics>true</metrics>
+    <events>true</events>
+    <asynchronous_metrics>true</asynchronous_metrics>
+</prometheus>
 ```
 
 ## query_log {#server_configuration_parameters-query-log}

From 0ad436aa2c25d534b7b05d48b14a5537ad4e1f88 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 9 Mar 2023 08:07:59 +0000
Subject: [PATCH 312/333] Revert debug changes

---
 src/Coordination/Changelog.cpp        | 7 -------
 tests/config/config.d/keeper_port.xml | 1 -
 2 files changed, 8 deletions(-)

diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index 13da4dffac8..ddedae4fa0f 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -211,20 +211,13 @@ public:
     void flush()
     {
         auto * file_buffer = tryGetFileBuffer();
-        LOG_TRACE(log, "Trying to flush");
         if (file_buffer)
         {
             /// Fsync file system if needed
             if (log_file_settings.force_sync)
-            {
                 file_buffer->sync();
-            }
             else
-            {
-                LOG_TRACE(log, "Flushing with next, in buffer {}", file_buffer->offset());
                 file_buffer->next();
-                LOG_TRACE(log, "Flushed in total {}", file_buffer->count());
-            }
         }
     }
 
diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml
index 6c60c8936a9..cffd325e968 100644
--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@@ -14,7 +14,6 @@
             <!-- we want all logs for complex problems investigation -->
             <reserved_log_items>1000000000000000</reserved_log_items>
             <snapshot_distance>100000</snapshot_distance>
-            <raft_logs_level>trace</raft_logs_level>
 
             <!-- For instant start in single node configuration -->
             <heart_beat_interval_ms>0</heart_beat_interval_ms>

From 7161bbeb8850e2ec15010c8dda376af7e4b4a34d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 9 Mar 2023 11:16:29 +0100
Subject: [PATCH 313/333] More improvements based on PR review

---
 src/Client/ClientBase.cpp | 11 +++--------
 src/Client/TestHint.cpp   | 10 ++++++++++
 src/Client/TestHint.h     |  3 +++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 4476678f0f4..876b1d1906d 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1883,8 +1883,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                     current_exception->rethrow();
                 }
 
-                if (std::find(hint.clientErrors().begin(), hint.clientErrors().end(), current_exception->code())
-                    == hint.clientErrors().end())
+                if (!hint.hasExpectedClientError(current_exception->code()))
                 {
                     if (hint.hasClientErrors())
                         current_exception->addMessage("\nExpected client error: {}.", hint.clientErrors());
@@ -1944,9 +1943,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                             fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n",
                                        test_hint.serverErrors(), full_query);
                         }
-                        else if (
-                            std::find(test_hint.serverErrors().begin(), test_hint.serverErrors().end(), server_exception->code())
-                            == test_hint.serverErrors().end())
+                        else if (!test_hint.hasExpectedServerError(server_exception->code()))
                         {
                             error_matches_hint = false;
                             fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n",
@@ -1961,9 +1958,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
                             fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n",
                                        test_hint.clientErrors(), full_query);
                         }
-                        else if (
-                            std::find(test_hint.clientErrors().begin(), test_hint.clientErrors().end(), client_exception->code())
-                            == test_hint.clientErrors().end())
+                        else if (!test_hint.hasExpectedClientError(client_exception->code()))
                         {
                             error_matches_hint = false;
                             fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n",
diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp
index 8cc91250721..b64882577ee 100644
--- a/src/Client/TestHint.cpp
+++ b/src/Client/TestHint.cpp
@@ -52,6 +52,16 @@ TestHint::TestHint(const String & query_)
     }
 }
 
+bool TestHint::hasExpectedClientError(int error)
+{
+    return std::find(client_errors.begin(), client_errors.end(), error) != client_errors.end();
+}
+
+bool TestHint::hasExpectedServerError(int error)
+{
+    return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end();
+}
+
 void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
 {
     std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff"};
diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h
index fb1dbfb72dc..63f16b1dd97 100644
--- a/src/Client/TestHint.h
+++ b/src/Client/TestHint.h
@@ -62,6 +62,9 @@ public:
     bool hasClientErrors() { return !client_errors.empty(); }
     bool hasServerErrors() { return !server_errors.empty(); }
 
+    bool hasExpectedClientError(int error);
+    bool hasExpectedServerError(int error);
+
 private:
     const String & query;
     ErrorVector server_errors{};

From 7bf1b8341440c90912db0d1c1f0f2e596bcf7de3 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 9 Mar 2023 12:37:57 +0100
Subject: [PATCH 314/333] add checks

---
 src/Functions/array/arrayReverse.cpp  | 2 ++
 src/Interpreters/OptimizeIfChains.cpp | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/Functions/array/arrayReverse.cpp b/src/Functions/array/arrayReverse.cpp
index 912adbadc7c..654a6c4cebf 100644
--- a/src/Functions/array/arrayReverse.cpp
+++ b/src/Functions/array/arrayReverse.cpp
@@ -91,6 +91,8 @@ ColumnPtr FunctionArrayReverse::executeImpl(const ColumnsWithTypeAndName & argum
         || executeFixedString(*src_inner_col, offsets, *res_inner_col)
         || executeGeneric(*src_inner_col, offsets, *res_inner_col);
 
+    chassert(bool(src_nullable_col) == bool(res_nullable_col));
+
     if (src_nullable_col)
         if (!executeNumber<UInt8>(src_nullable_col->getNullMapColumn(), offsets, res_nullable_col->getNullMapColumn()))
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of null map of the first argument of function {}",
diff --git a/src/Interpreters/OptimizeIfChains.cpp b/src/Interpreters/OptimizeIfChains.cpp
index ba4c7bcd95f..9a5f9bcb2e1 100644
--- a/src/Interpreters/OptimizeIfChains.cpp
+++ b/src/Interpreters/OptimizeIfChains.cpp
@@ -64,6 +64,7 @@ ASTs OptimizeIfChainsVisitor::ifChain(const ASTPtr & child)
         throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST for function 'if'");
 
     const auto * function_args = function_node->arguments->as<ASTExpressionList>();
+    chassert(function_args);
 
     if (!function_args || function_args->children.size() != 3)
         throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,

From 433c226b1fc6770729f3c3d76bf33f03140c5a27 Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <30620077+chhetripradeep@users.noreply.github.com>
Date: Thu, 9 Mar 2023 20:51:45 +0800
Subject: [PATCH 315/333] Revert "Revert "Add join_algorithm='grace_hash' to
 stress tests"" (#47372)

---
 tests/ci/stress.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index 4f723dba101..12c40ea1f66 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -30,13 +30,15 @@ def get_options(i, upgrade_check):
 
     if i % 2 == 1:
         join_alg_num = i // 2
-        if join_alg_num % 4 == 0:
+        if join_alg_num % 5 == 0:
             client_options.append("join_algorithm='parallel_hash'")
-        if join_alg_num % 4 == 1:
+        if join_alg_num % 5 == 1:
             client_options.append("join_algorithm='partial_merge'")
-        if join_alg_num % 4 == 2:
+        if join_alg_num % 5 == 2:
             client_options.append("join_algorithm='full_sorting_merge'")
-        if join_alg_num % 4 == 3:
+        if join_alg_num % 5 == 3:
+            client_options.append("join_algorithm='grace_hash'")
+        if join_alg_num % 5 == 4:
             client_options.append("join_algorithm='auto'")
             client_options.append("max_rows_in_join=1000")
 

From 89de49ef3c3842ca47fd61cf93656ac99bd4b24c Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 9 Mar 2023 08:31:28 -0500
Subject: [PATCH 316/333] add tabs for GCS and S3

---
 .../example-datasets/nyc-taxi.md              | 82 +++++++++++++++----
 1 file changed, 64 insertions(+), 18 deletions(-)

diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md
index 69098f63037..36bcb6f232e 100644
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@@ -5,17 +5,19 @@ sidebar_position: 2
 description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009
 ---
 
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # New York Taxi Data
 
 The New York taxi data consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. The dataset can be obtained in a couple of ways:
 
-- insert the data directly into ClickHouse Cloud from S3
+- insert the data directly into ClickHouse Cloud from S3 or GCS
 - download prepared partitions
 
-## Retrieve the Data from S3
+## Create the table trips
 
-Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in AWS S3, which is easily streamed into
-ClickHouse Cloud using the `s3` table function. Start by creating a table for the taxi rides:
+Start by creating a table for the taxi rides:
 
 ```sql
 CREATE TABLE trips (
@@ -38,9 +40,50 @@ CREATE TABLE trips (
     dropoff_ntaname     LowCardinality(String)
 )
 ENGINE = MergeTree
-PRIMARY KEY (pickup_datetime, dropoff_datetime)
+PRIMARY KEY (pickup_datetime, dropoff_datetime);
 ```
 
+## Retrieve the Data from Object Storage
+
+Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in object storage, which is easily streamed into
+ClickHouse Cloud using the `s3` table function. 
+
+The same data is stored in both S3 and GCS; choose either tab.
+
+<Tabs groupId="storageVendor">
+<TabItem value="gcs" label="GCS" default>
+
+The following command streams three files from a GCS bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2):
+
+```sql
+INSERT INTO trips
+SELECT
+    trip_id,
+    pickup_datetime,
+    dropoff_datetime,
+    pickup_longitude,
+    pickup_latitude,
+    dropoff_longitude,
+    dropoff_latitude,
+    passenger_count,
+    trip_distance,
+    fare_amount,
+    extra,
+    tip_amount,
+    tolls_amount,
+    total_amount,
+    payment_type,
+    pickup_ntaname,
+    dropoff_ntaname
+FROM s3(
+    'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz',
+    'TabSeparatedWithNames'
+);
+```
+
+</TabItem>
+<TabItem value="s3" label="S3">
+
 The following command streams three files from an S3 bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2):
 
 ```sql
@@ -66,14 +109,17 @@ SELECT
 FROM s3(
     'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz',
     'TabSeparatedWithNames'
-)
+);
 ```
 
+</TabItem>
+</Tabs>
+
 Let's see how many rows were inserted:
 
 ```sql
 SELECT count()
-FROM trips
+FROM trips;
 ```
 
 Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's look at a few rows:
@@ -81,7 +127,7 @@ Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's
 ```sql
 SELECT *
 FROM trips
-LIMIT 10
+LIMIT 10;
 ```
 
 Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more:
@@ -110,7 +156,7 @@ SELECT
 FROM trips
 GROUP BY pickup_ntaname
 ORDER BY count DESC
-LIMIT 10
+LIMIT 10;
 ```
 
 The result is:
@@ -137,7 +183,7 @@ SELECT
    passenger_count,
    avg(total_amount)
 FROM trips
-GROUP BY passenger_count
+GROUP BY passenger_count;
 ```
 
 ```response
@@ -165,7 +211,7 @@ SELECT
    count(*)
 FROM trips
 GROUP BY passenger_count, year, distance
-ORDER BY year, count(*) DESC
+ORDER BY year, count(*) DESC;
 ```
 
 The first part of the result is:
@@ -211,7 +257,7 @@ If you will run the queries described below, you have to use the full table name
 Q1:
 
 ``` sql
-SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type
+SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type;
 ```
 
 0.490 seconds.
@@ -219,7 +265,7 @@ SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type
 Q2:
 
 ``` sql
-SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count
+SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count;
 ```
 
 1.224 seconds.
@@ -227,7 +273,7 @@ SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenge
 Q3:
 
 ``` sql
-SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year
+SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year;
 ```
 
 2.104 seconds.
@@ -238,7 +284,7 @@ Q4:
 SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*)
 FROM trips_mergetree
 GROUP BY passenger_count, year, distance
-ORDER BY year, count(*) DESC
+ORDER BY year, count(*) DESC;
 ```
 
 3.593 seconds.
@@ -254,19 +300,19 @@ Creating a table on three servers:
 On each server:
 
 ``` sql
-CREATE TABLE default.trips_mergetree_third ( trip_id UInt32,  vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14),  pickup_date Date,  pickup_datetime DateTime,  dropoff_date Date,  dropoff_datetime DateTime,  store_and_fwd_flag UInt8,  rate_code_id UInt8,  pickup_longitude Float64,  pickup_latitude Float64,  dropoff_longitude Float64,  dropoff_latitude Float64,  passenger_count UInt8,  trip_distance Float64,  fare_amount Float32,  extra Float32,  mta_tax Float32,  tip_amount Float32,  tolls_amount Float32,  ehail_fee Float32,  improvement_surcharge Float32,  total_amount Float32,  payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4),  trip_type UInt8,  pickup FixedString(25),  dropoff FixedString(25),  cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3),  pickup_nyct2010_gid UInt8,  pickup_ctlabel Float32,  pickup_borocode UInt8,  pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5),  pickup_ct2010 FixedString(6),  pickup_boroct2010 FixedString(7),  pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2),  pickup_ntacode FixedString(4),  pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195),  pickup_puma UInt16,  dropoff_nyct2010_gid UInt8,  dropoff_ctlabel Float32,  dropoff_borocode UInt8,  dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5),  dropoff_ct2010 FixedString(6),  dropoff_boroct2010 FixedString(7),  dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2),  dropoff_ntacode FixedString(4),  dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195),  dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192)
+CREATE TABLE default.trips_mergetree_third ( trip_id UInt32,  vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14),  pickup_date Date,  pickup_datetime DateTime,  dropoff_date Date,  dropoff_datetime DateTime,  store_and_fwd_flag UInt8,  rate_code_id UInt8,  pickup_longitude Float64,  pickup_latitude Float64,  dropoff_longitude Float64,  dropoff_latitude Float64,  passenger_count UInt8,  trip_distance Float64,  fare_amount Float32,  extra Float32,  mta_tax Float32,  tip_amount Float32,  tolls_amount Float32,  ehail_fee Float32,  improvement_surcharge Float32,  total_amount Float32,  payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4),  trip_type UInt8,  pickup FixedString(25),  dropoff FixedString(25),  cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3),  pickup_nyct2010_gid UInt8,  pickup_ctlabel Float32,  pickup_borocode UInt8,  pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5),  pickup_ct2010 FixedString(6),  pickup_boroct2010 FixedString(7),  pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2),  pickup_ntacode FixedString(4),  pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195),  pickup_puma UInt16,  dropoff_nyct2010_gid UInt8,  dropoff_ctlabel Float32,  dropoff_borocode UInt8,  dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5),  dropoff_ct2010 FixedString(6),  dropoff_boroct2010 FixedString(7),  dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2),  dropoff_ntacode FixedString(4),  dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195),  dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192);
 ```
 
 On the source server:
 
 ``` sql
-CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand())
+CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand());
 ```
 
 The following query redistributes data:
 
 ``` sql
-INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree
+INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree;
 ```
 
 This takes 2454 seconds.

From 504486e7b9f4769f41bfde17d6e4f06da07ad1c8 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 9 Mar 2023 08:34:05 -0500
Subject: [PATCH 317/333] correct heading

---
 docs/en/getting-started/example-datasets/nyc-taxi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md
index 36bcb6f232e..aee2da5a545 100644
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@@ -43,7 +43,7 @@ ENGINE = MergeTree
 PRIMARY KEY (pickup_datetime, dropoff_datetime);
 ```
 
-## Retrieve the Data from Object Storage
+## Load the Data directly from Object Storage
 
 Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in object storage, which is easily streamed into
 ClickHouse Cloud using the `s3` table function. 

From c69d2c45b7d3e6a4661b7be6f05ce1aa42e56206 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 9 Mar 2023 08:35:29 -0500
Subject: [PATCH 318/333] correct heading

---
 docs/en/getting-started/example-datasets/nyc-taxi.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md
index aee2da5a545..0dc317eeeae 100644
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@@ -115,6 +115,8 @@ FROM s3(
 </TabItem>
 </Tabs>
 
+## Sample Queries
+
 Let's see how many rows were inserted:
 
 ```sql

From 687fe3ea5198a5fa29ca6338ea9b4065acfa8296 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 9 Mar 2023 08:39:44 -0500
Subject: [PATCH 319/333] add note about prepared partitions

---
 docs/en/getting-started/example-datasets/nyc-taxi.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md
index 0dc317eeeae..9730faa873c 100644
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@@ -237,6 +237,10 @@ The first part of the result is:
 
 ## Download of Prepared Partitions {#download-of-prepared-partitions}
 
+:::note
+The following steps provide information about the original dataset, and a method for loading prepared partitions into a self-managed ClickHouse server environment.
+:::
+
 See https://github.com/toddwschneider/nyc-taxi-data and http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html for the description of a dataset and instructions for downloading.
 
 Downloading will result in about 227 GB of uncompressed data in CSV files. The download takes about an hour over a 1 Gbit connection (parallel downloading from s3.amazonaws.com recovers at least half of a 1 Gbit channel).

From 3aa0c32101fadf683a644f47972c117173a99e8a Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <svtrifonov@gmail.com>
Date: Thu, 9 Mar 2023 14:50:13 +0100
Subject: [PATCH 320/333] Update
 docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md

---
 .../aggregate-functions/reference/exponentialmovingaverage.md   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
index 296aae41daa..5546ade1758 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md
@@ -181,7 +181,7 @@ ORDER BY time ASC;
 └───────┴─────────────────────┴─────────────┴──────────┘
 
 
--- Calculate timeunit timeunit using toRelativeHourNum
+-- Calculate timeunit using toRelativeHourNum
 SELECT
     value,
     time,

From 1d78c1b6d9232703d4e77cc98772a5d97a04cce1 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 9 Mar 2023 14:45:26 +0000
Subject: [PATCH 321/333] Use force_sync=true in upgrade check

---
 docker/test/upgrade/run.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index ce8a56c777e..de1f92823b2 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -60,6 +60,13 @@ install_packages previous_release_package_folder
 export USE_S3_STORAGE_FOR_MERGE_TREE=1
 # Previous version may not be ready for fault injections
 export ZOOKEEPER_FAULT_INJECTION=0
+
+# force_sync=false doesn't work correctly on some older versions
+sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
+  | sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
+  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
+sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+
 configure
 
 # But we still need default disk because some tables loaded only into it

From b0931c89675c08ec41bfc51710a7cb6bd39ecc45 Mon Sep 17 00:00:00 2001
From: Clayton McClure <103603310+cmcclure-twilio@users.noreply.github.com>
Date: Thu, 9 Mar 2023 07:49:00 -0700
Subject: [PATCH 322/333] Update copier to use group by to find partitions
 (#47386)

---
 programs/copier/ClusterCopier.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp
index bc882719a08..d3696f2cf12 100644
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@@ -1867,8 +1867,8 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
     String query;
     {
         WriteBufferFromOwnString wb;
-        wb << "SELECT DISTINCT " << partition_name << " AS partition FROM"
-           << " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
+        wb << "SELECT " << partition_name << " AS partition FROM "
+           << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC";
         query = wb.str();
     }
 

From 53277831cc07642963e6a64425700be8046022a1 Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Thu, 9 Mar 2023 16:02:06 +0100
Subject: [PATCH 323/333] Remove dated warning

scary language is inappropriately scary.
---
 docs/en/operations/utilities/clickhouse-local.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index a4fa5579638..79b8bc90634 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -14,10 +14,6 @@ Accepts data that represent tables and queries them using [ClickHouse SQL dialec
 
 By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument.
 
-:::warning
-It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
-:::
-
 For temporary data, a unique temporary data directory is created by default.
 
 ## Usage {#usage}

From d81065ea646e482bd8f180d094fa20aed3be2904 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 9 Mar 2023 16:14:54 +0100
Subject: [PATCH 324/333] Fix headers in schema inference docs

---
 docs/en/interfaces/schema-inference.md | 80 +++++++++++++-------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 25bdb0c36a3..e028b4a6d96 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM
 4	47	Brayan	['movies','skydiving']
 ```
 
-# Using structure from insertion table {#using-structure-from-insertion-table}
+## Using structure from insertion table {#using-structure-from-insertion-table}
 
 When table functions `file/s3/url/hdfs` are used to insert data into a table,
 there is an option to use the structure from the insertion table instead of extracting it from the data.
@@ -222,7 +222,7 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob
 
 In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used.
 
-# Schema inference cache {#schema-inference-cache}
+## Schema inference cache {#schema-inference-cache}
 
 For most input formats schema inference reads some data to determine its structure and this process can take some time.
 To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
@@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
 └─────────┘
 ```
 
-# Text formats {#text-formats}
+## Text formats {#text-formats}
 
 For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
 and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
 is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
 By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).
 
-## JSON formats {#json-formats}
+### JSON formats {#json-formats}
 
 In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them.
 
@@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps.
 ...
 ```
 
-### JSON settings {#json-settings}
+#### JSON settings {#json-settings}
 
-#### input_format_json_read_objects_as_strings
+##### input_format_json_read_objects_as_strings
 
 Enabling this setting allows reading nested JSON objects as strings.
 This setting can be used to read nested JSON objects without using JSON object type.
@@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$
 └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-#### input_format_json_try_infer_numbers_from_strings
+##### input_format_json_try_infer_numbers_from_strings
 
 Enabling this setting allows inferring numbers from string values.
 
@@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$
 └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-#### input_format_json_read_numbers_as_strings
+##### input_format_json_read_numbers_as_strings
 
 Enabling this setting allows reading numeric values as strings.
 
@@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$
 └───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-#### input_format_json_read_bools_as_numbers
+##### input_format_json_read_bools_as_numbers
 
 Enabling this setting allows reading Bool values as numbers.
 
@@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$
 └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## CSV {#csv}
+### CSV {#csv}
 
 In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse
 the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number,
@@ -726,7 +726,7 @@ $$)
 └──────────────┴───────────────┘
 ```
 
-## TSV/TSKV {#tsv-tskv}
+### TSV/TSKV {#tsv-tskv}
 
 In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
 the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String.
@@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3]	42.42	Hello World!')
 └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## CustomSeparated {#custom-separated}
+### CustomSeparated {#custom-separated}
 
 In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
 the data type for each value according to escaping rule.
@@ -1080,7 +1080,7 @@ $$)
 └────────┴───────────────┴────────────┘
 ```
 
-## Template {#template}
+### Template {#template}
 
 In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the 
 data type for each value according to its escaping rule.
@@ -1120,7 +1120,7 @@ $$)
 └──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## Regexp {#regexp}
+### Regexp {#regexp}
 
 Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer
 data type for each value according to the specified escaping rule.
@@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
 └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## Settings for text formats {settings-for-text-formats}
+### Settings for text formats {#settings-for-text-formats}
 
-### input_format_max_rows_to_read_for_schema_inference
+#### input_format_max_rows_to_read_for_schema_inference
 
 This setting controls the maximum number of rows to be read while schema inference.
 The more rows are read, the more time is spent on schema inference, but the greater the chance to
@@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls)
 
 Default value: `25000`.
 
-### column_names_for_schema_inference
+#### column_names_for_schema_inference
 
 The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`.
 
@@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World!	42	[1, 2, 3]') settings column_names_for_schema_
 └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-### schema_inference_hints
+#### schema_inference_hints
 
 The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'.
 This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema.
@@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
 └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-### schema_inference_make_columns_nullable
+#### schema_inference_make_columns_nullable
 
 Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
 If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference.
@@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$
 └─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-### input_format_try_infer_integers
+#### input_format_try_infer_integers
 
 If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats.
 If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
@@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$
 └────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-### input_format_try_infer_datetimes
+#### input_format_try_infer_datetimes
 
 If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
 If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
@@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$
 
 Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)
 
-### input_format_try_infer_dates
+#### input_format_try_infer_dates
 
 If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats.
 If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`,
@@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$
 └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-# Self describing formats {#self-describing-formats}
+## Self describing formats {#self-describing-formats}
 
 Self-describing formats contain information about the structure of the data in the data itself,
 it can be some header with a description, a binary type tree, or some kind of table.
 To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing
 information about the types and converts it into a schema of the ClickHouse table.
 
-## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
+### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
 
 ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data.
 While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types.
@@ -1412,7 +1412,7 @@ $$)
 └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## JSON formats with metadata {#json-with-metadata}
+### JSON formats with metadata {#json-with-metadata}
 
 Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types.
 In schema inference for such formats, ClickHouse reads this metadata.
@@ -1465,7 +1465,7 @@ $$)
 └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## Avro {#avro}
+### Avro {#avro}
 
 In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
 
@@ -1485,7 +1485,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic
 
 Other Avro types are not supported.
 
-## Parquet {#parquet}
+### Parquet {#parquet}
 
 In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
 
@@ -1513,7 +1513,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C
 
 Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
 
-## Arrow {#arrow}
+### Arrow {#arrow}
 
 In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
 
@@ -1541,7 +1541,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli
 
 Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
 
-## ORC {#orc}
+### ORC {#orc}
 
 In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
 
@@ -1564,17 +1564,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click
 
 Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
 
-## Native {#native}
+### Native {#native}
 
 Native format is used inside ClickHouse and contains the schema in the data.
 In schema inference, ClickHouse reads the schema from the data without any transformations.
 
-# Formats with external schema {#formats-with-external-schema}
+## Formats with external schema {#formats-with-external-schema}
 
 Such formats require a schema describing the data in a separate file in a specific schema language.
 To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema.
 
-# Protobuf {#protobuf}
+### Protobuf {#protobuf}
 
 In schema inference for Protobuf format ClickHouse uses the following type matches:
 
@@ -1592,7 +1592,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match
 | `repeated T`                  | [Array(T)](../sql-reference/data-types/array.md)  |
 | `message`, `group`            | [Tuple](../sql-reference/data-types/tuple.md)     |
 
-# CapnProto {#capnproto}
+### CapnProto {#capnproto}
 
 In schema inference for CapnProto format ClickHouse uses the following type matches:
 
@@ -1615,13 +1615,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
 | `struct`                           | [Tuple](../sql-reference/data-types/tuple.md)          |
 | `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) |
 
-# Strong-typed binary formats {#strong-typed-binary-formats}
+## Strong-typed binary formats {#strong-typed-binary-formats}
 
 In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
 In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
 the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.
 
-## MsgPack {msgpack}
+### MsgPack {#msgpack}
 
 In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table
 using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches:
@@ -1641,7 +1641,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the
 
 By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
 
-## BSONEachRow {#bsoneachrow}
+### BSONEachRow {#bsoneachrow}
 
 In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts
 values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches:
@@ -1661,11 +1661,11 @@ values, names, and types from the data and then transforms these types to ClickH
 
 By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
 
-# Formats with constant schema {#formats-with-constant-schema}
+## Formats with constant schema {#formats-with-constant-schema}
 
 Data in such formats always have the same schema.
 
-## LineAsString {#line-as-string}
+### LineAsString {#line-as-string}
 
 In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`.
 
@@ -1680,7 +1680,7 @@ DESC format(LineAsString, 'Hello\nworld!')
 └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## JSONAsString {#json-as-string}
+### JSONAsString {#json-as-string}
 
 In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`.
 
@@ -1695,7 +1695,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}')
 └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 
-## JSONAsObject {#json-as-object}
+### JSONAsObject {#json-as-object}
 
 In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`.
 

From 95351bc2d365843387a9709cd8b936572ab3f929 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 9 Mar 2023 12:05:26 -0500
Subject: [PATCH 325/333] standardize admonitions

---
 .../database-engines/materialized-mysql.md    |  4 +--
 .../materialized-postgresql.md                |  4 +--
 docs/en/engines/database-engines/mysql.md     |  2 +-
 .../table-engines/integrations/hdfs.md        |  4 +--
 .../table-engines/integrations/kafka.md       |  2 +-
 .../integrations/materialized-postgresql.md   |  2 +-
 .../table-engines/integrations/postgresql.md  |  2 +-
 .../engines/table-engines/integrations/s3.md  |  4 +--
 .../mergetree-family/aggregatingmergetree.md  |  2 +-
 .../mergetree-family/collapsingmergetree.md   |  2 +-
 .../custom-partitioning-key.md                |  4 +--
 .../mergetree-family/graphitemergetree.md     |  6 ++--
 .../mergetree-family/invertedindexes.md       |  2 +-
 .../mergetree-family/mergetree.md             |  4 +--
 .../mergetree-family/replacingmergetree.md    |  4 +--
 .../mergetree-family/replication.md           |  2 +-
 .../mergetree-family/summingmergetree.md      |  2 +-
 .../versionedcollapsingmergetree.md           |  2 +-
 docs/en/interfaces/formats.md                 | 28 +++++++++----------
 docs/en/interfaces/http.md                    |  4 +--
 docs/en/interfaces/postgresql.md              |  2 +-
 .../third-party/client-libraries.md           |  2 +-
 .../en/interfaces/third-party/integrations.md |  2 +-
 docs/en/operations/access-rights.md           |  4 +--
 .../external-authenticators/kerberos.md       |  6 ++--
 docs/en/operations/opentelemetry.md           |  2 +-
 docs/en/operations/query-cache.md             |  2 +-
 .../settings.md                               |  4 +--
 .../settings/merge-tree-settings.md           |  2 +-
 .../operations/settings/settings-formats.md   |  2 +-
 docs/en/operations/settings/settings-users.md |  4 +--
 docs/en/operations/settings/settings.md       | 18 ++++++------
 docs/en/operations/storing-data.md            |  2 +-
 docs/en/operations/system-tables/parts.md     |  2 +-
 docs/en/operations/tips.md                    |  2 +-
 .../operations/utilities/clickhouse-copier.md |  2 +-
 .../parametric-functions.md                   |  4 +--
 docs/en/sql-reference/data-types/float.md     |  2 +-
 docs/en/sql-reference/data-types/json.md      |  4 +--
 .../data-types/special-data-types/interval.md |  2 +-
 .../external-dicts-dict-layout.md             |  4 +--
 .../external-dicts-dict-structure.md          |  4 +--
 docs/en/sql-reference/distributed-ddl.md      |  4 +--
 .../sql-reference/functions/hash-functions.md |  4 +--
 .../sql-reference/functions/introspection.md  |  2 +-
 .../sql-reference/functions/nlp-functions.md  |  2 +-
 .../functions/other-functions.md              |  8 +++---
 docs/en/sql-reference/operators/exists.md     |  2 +-
 docs/en/sql-reference/operators/index.md      |  2 +-
 .../sql-reference/statements/alter/column.md  |  2 +-
 .../statements/alter/constraint.md            |  2 +-
 .../statements/create/row-policy.md           |  2 +-
 .../sql-reference/statements/create/table.md  |  8 +++---
 .../sql-reference/statements/create/user.md   |  2 +-
 docs/en/sql-reference/statements/delete.md    |  2 +-
 docs/en/sql-reference/statements/optimize.md  |  2 +-
 docs/en/sql-reference/statements/system.md    |  2 +-
 docs/en/sql-reference/statements/watch.md     |  4 +--
 docs/en/sql-reference/table-functions/file.md |  2 +-
 docs/en/sql-reference/table-functions/hdfs.md |  2 +-
 .../table-functions/hdfsCluster.md            |  2 +-
 .../en/sql-reference/table-functions/index.md |  2 +-
 docs/en/sql-reference/table-functions/s3.md   |  2 +-
 .../table-functions/s3Cluster.md              |  2 +-
 64 files changed, 113 insertions(+), 113 deletions(-)

diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md
index 899c8d024f1..9b2d1d7e1de 100644
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@@ -6,7 +6,7 @@ sidebar_position: 70
 
 # [experimental] MaterializedMySQL 
 
-:::warning
+:::important
 This is an experimental feature that should not be used in production.
 :::
 
@@ -245,7 +245,7 @@ extra care needs to be taken.
 
 You may specify overrides for tables that do not exist yet.
 
-:::warning
+:::important
 It is easy to break replication with table overrides if not used with care. For example:
     
 * If an ALIAS column is added with a table override, and a column with the same name is later added to the source
diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md
index b43f71a7576..7be520ee25e 100644
--- a/docs/en/engines/database-engines/materialized-postgresql.md
+++ b/docs/en/engines/database-engines/materialized-postgresql.md
@@ -54,7 +54,7 @@ After `MaterializedPostgreSQL` database is created, it does not automatically de
 ATTACH TABLE postgres_database.new_table;
 ```
 
-:::warning
+:::important
 Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
 :::
 
@@ -145,7 +145,7 @@ FROM pg_class
 WHERE oid = 'postgres_table'::regclass;
 ```
 
-:::warning
+:::important
 Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
 :::
 
diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md
index e4ff734d55f..e2c4f134a90 100644
--- a/docs/en/engines/database-engines/mysql.md
+++ b/docs/en/engines/database-engines/mysql.md
@@ -60,7 +60,7 @@ These variables are supported:
 - `version`
 - `max_allowed_packet`
 
-:::warning
+:::important
 By now these variables are stubs and don't correspond to anything.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index 7c04a6594a6..c14d05aa03a 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -64,7 +64,7 @@ SELECT * FROM hdfs_engine_table LIMIT 2
     -   Indexes.
     -   [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended.
   
-  :::warning Zero-copy replication is not ready for production
+  :::important Zero-copy replication is not ready for production
   Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
   :::
 
@@ -110,7 +110,7 @@ Table consists of all the files in both directories (all files should satisfy fo
 CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV')
 ```
 
-:::warning
+:::important
 If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index 255ba06f056..86647fb6978 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -102,7 +102,7 @@ Examples:
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects. If possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
index 11e7928c3ed..c9b3f351568 100644
--- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md
+++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
@@ -52,6 +52,6 @@ PRIMARY KEY key;
 SELECT key, value, _version FROM postgresql_db.postgresql_replica;
 ```
 
-:::warning
+:::important
 Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
 :::
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index b73d28c8508..d338a2a58bd 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -74,7 +74,7 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp
 
 PostgreSQL `Array` types are converted into ClickHouse arrays.
 
-:::warning
+:::important
 Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 723425429a5..8e1a4d91cac 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -63,7 +63,7 @@ For more information about virtual columns see [here](../../../engines/table-eng
     -   Indexes.
     -   [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not supported.
 
-  :::warning Zero-copy replication is not ready for production
+  :::important Zero-copy replication is not ready for production
   Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
   :::
 
@@ -78,7 +78,7 @@ For more information about virtual columns see [here](../../../engines/table-eng
 
 Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
 
-:::warning
+:::important
 If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
index 9677f75a358..9b7f0cd2486 100644
--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@@ -43,7 +43,7 @@ When creating an `AggregatingMergeTree` table the same [clauses](../../../engine
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects and, if possible, switch the old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
index 0bd665116f0..9acff099557 100644
--- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
@@ -45,7 +45,7 @@ When creating a `CollapsingMergeTree` table, the same [query clauses](../../../e
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects and, if possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
index b1e79c4c3fd..f01cd90ccf0 100644
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -6,7 +6,7 @@ sidebar_label: Custom Partitioning Key
 
 # Custom Partitioning Key
 
-:::warning
+:::important
 In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months.
 
 You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression.
@@ -159,7 +159,7 @@ FROM session_log
 GROUP BY UserID;
 ```
 
-:::warning
+:::important
 Performance of such a query heavily depends on the table layout. Because of that the optimisation is not enabled by default.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
index 104ec049ec4..d71004135f1 100644
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@@ -55,7 +55,7 @@ When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/t
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects and, if possible, switch old projects to the method described above.
 :::
 
@@ -129,7 +129,7 @@ default
     ...
 ```
 
-:::warning
+:::important
 Patterns must be strictly ordered:
 
 1. Patterns without `function` or `retention`.
@@ -263,6 +263,6 @@ Valid values:
 </graphite_rollup>
 ```
 
-:::warning
+:::important
 Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
 :::
diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
index aa11258dc4a..91ee0313a69 100644
--- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
@@ -15,7 +15,7 @@ tokenized cells of the string column. For example, the string cell "I will be a
 " wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
 useful the resulting inverted index will be.
 
-:::warning
+:::important
 Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
 ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.
 :::
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index fc8060077b0..5dbe7602856 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -192,7 +192,7 @@ The `index_granularity` setting can be omitted because 8192 is the default value
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects. If possible, switch old projects to the method described above.
 :::
 
@@ -1087,7 +1087,7 @@ Other parameters:
 
 Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
 
-  :::warning Zero-copy replication is not ready for production
+  :::important Zero-copy replication is not ready for production
   Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
   :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
index f5d81182898..0b76500f2a6 100644
--- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
@@ -30,7 +30,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 
 For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md).
 
-:::warning
+:::important
 Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`.
 :::
 
@@ -96,7 +96,7 @@ When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/t
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects and, if possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md
index 37ab8ac9fd3..f9adad0fa6d 100644
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@@ -43,7 +43,7 @@ ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/clickhouse-keeper
 
 To use replication, set parameters in the [zookeeper](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings_zookeeper) server configuration section.
 
-:::warning
+:::important
 Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
index b2b6272c58e..bea37706939 100644
--- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
@@ -44,7 +44,7 @@ When creating a `SummingMergeTree` table the same [clauses](../../../engines/tab
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects and, if possible, switch the old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
index 2891907f79a..1a44569711a 100644
--- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@@ -58,7 +58,7 @@ When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../.
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::warning
+:::important
 Do not use this method in new projects. If possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index db2e773a685..fa15de16c71 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -205,7 +205,7 @@ Differs from the `TabSeparated` format in that the column names are written in t
 
 During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -217,7 +217,7 @@ This format is also available under the name `TSVWithNames`.
 
 Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -470,7 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -480,7 +480,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -500,7 +500,7 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -510,7 +510,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -969,7 +969,7 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
 
 Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -979,7 +979,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -991,7 +991,7 @@ the types from input data will be compared with the types of the corresponding c
 
 Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1001,7 +1001,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1120,7 +1120,7 @@ CREATE TABLE IF NOT EXISTS example_table
 -   If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type).
 -   If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
 
-:::warning
+:::important
 When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
 :::
 
@@ -1450,7 +1450,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
 -   N `String`s specifying column names
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1464,7 +1464,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   N `String`s specifying column names
 -   N `String`s specifying column types
 
-:::warning
+:::important
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1912,7 +1912,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
 SELECT * FROM topic1_stream;
 ```
 
-:::warning
+:::important
 Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine.
 :::
 
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 9af6df0c87d..efa51bb34ea 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -445,7 +445,7 @@ Next are the configuration methods for different `type`.
 
 The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
 
-:::warning
+:::important
 To keep the default `handlers` such as` query`, `play`,` ping`, add the `<defaults/>` rule.
 :::
 
@@ -476,7 +476,7 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:
 max_final_threads   2
 ```
 
-:::warning
+:::important
 In one `predefined_query_handler` only supports one `query` of an insert type.
 :::
 
diff --git a/docs/en/interfaces/postgresql.md b/docs/en/interfaces/postgresql.md
index 9ff83559787..0cc70cce571 100644
--- a/docs/en/interfaces/postgresql.md
+++ b/docs/en/interfaces/postgresql.md
@@ -54,7 +54,7 @@ default=>
 
 And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse.
 
-:::caution
+:::important
 The PostgreSQL protocol currently only supports plain-text passwords.
 :::
 
diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md
index 0e065cb7179..4ce63ba647b 100644
--- a/docs/en/interfaces/third-party/client-libraries.md
+++ b/docs/en/interfaces/third-party/client-libraries.md
@@ -6,7 +6,7 @@ sidebar_label: Client Libraries
 
 # Client Libraries from Third-party Developers
 
-:::warning
+:::important
 ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality.
 :::
 
diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md
index 90a4f088be7..8197a1d1f58 100644
--- a/docs/en/interfaces/third-party/integrations.md
+++ b/docs/en/interfaces/third-party/integrations.md
@@ -6,7 +6,7 @@ sidebar_label: Integrations
 
 # Integration Libraries from Third-party Developers
 
-:::warning Disclaimer
+:::important Disclaimer
 ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality.
 :::
 
diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md
index 4c4a06dbe1e..38d32e3f1f7 100644
--- a/docs/en/operations/access-rights.md
+++ b/docs/en/operations/access-rights.md
@@ -24,7 +24,7 @@ You can configure access entities using:
 
 We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow.
 
-:::warning
+:::important
 You can’t manage the same access entity by both configuration methods simultaneously.
 :::
 
@@ -102,7 +102,7 @@ Privileges can be granted to a role by the [GRANT](../sql-reference/statements/g
 
 Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy.
 
-:::warning
+:::important
 Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
 :::
 
diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md
index 95944e96194..b49291f8e2e 100644
--- a/docs/en/operations/external-authenticators/kerberos.md
+++ b/docs/en/operations/external-authenticators/kerberos.md
@@ -59,11 +59,11 @@ With filtering by realm:
 </clickhouse>
 ```
 
-:::warning
+:::important
 You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication.
 :::
 
-:::warning
+:::important
 `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication.
 :::
 
@@ -103,7 +103,7 @@ Example (goes into `users.xml`):
 </clickhouse>
 ```
 
-:::warning
+:::important
 Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown.
 :::
 
diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md
index 1de5a09db0c..7faa4b1ee7c 100644
--- a/docs/en/operations/opentelemetry.md
+++ b/docs/en/operations/opentelemetry.md
@@ -7,7 +7,7 @@ title: "[experimental] Tracing ClickHouse with OpenTelemetry"
 
 [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry.
 
-:::warning    
+:::important    
 This is an experimental feature that will change in backwards-incompatible ways in future releases.
 :::
 
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 1a486de7904..a7ac011c796 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -29,7 +29,7 @@ Transactionally inconsistent caching is traditionally provided by client tools o
 the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
 This reduces maintenance effort and avoids redundancy.
 
-:::warning
+:::important
 The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
 processing) where wrong results are returned.
 :::
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 3fe815bc79a..4494ad39a0e 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -24,7 +24,7 @@ Default value: 3600.
 
 Data compression settings for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables.
 
-:::warning
+:::tip
 Don’t use it if you have just started using ClickHouse.
 :::
 
@@ -1367,7 +1367,7 @@ The following settings are available:
 
 Changed settings take effect immediately.
 
-:::warning
+:::important
 Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
 :::
 
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index 5bc174727ad..6290a23378c 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -289,7 +289,7 @@ Default value: 0 (seconds)
 
 When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
 
-:::warning Zero-copy replication is not ready for production
+:::important Zero-copy replication is not ready for production
 Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
 :::
 
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 919ebaf562f..b03f922d0d4 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -142,7 +142,7 @@ y	Nullable(String)
 z	IPv4
 ```
 
-:::warning
+:::important
 If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
 :::
 
diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md
index b55d64fc4f7..ac0025a1137 100644
--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@@ -118,8 +118,8 @@ To open access for user from any network, specify:
 <ip>::/0</ip>
 ```
 
-:::warning
-It’s insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet.
+:::important
+It is insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet.
 :::
 
 To open access only from localhost, specify:
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 94dcf159ca9..809c7c3e837 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -460,7 +460,7 @@ Possible values:
 
 Changes the behaviour of join operations with `ANY` strictness.
 
-:::warning
+:::important
 This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables.
 :::
 
@@ -550,7 +550,7 @@ Default value: 64.
 
 Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
 
-:::warning
+:::important
 Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
 :::
 
@@ -942,7 +942,7 @@ Higher values will lead to higher memory usage.
 
 The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
 
-:::warning
+:::important
 This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
 :::
 
@@ -960,7 +960,7 @@ We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows,
 
 We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed.
 
-:::warning
+:::important
 This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
 :::
 
@@ -1247,7 +1247,7 @@ Possible values:
 
 Default value: 1.
 
-:::warning
+:::important
 Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
 If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
 If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
@@ -1277,7 +1277,7 @@ Default value: `1`.
 
 This options will produce different results depending on the settings used.
 
-:::warning
+:::important
 This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
 :::
 
@@ -2186,7 +2186,7 @@ Default value: 0.
 This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine).
 :::
 
-:::warning
+:::important
 You should not rely on automatic batch splitting, since this may hurt performance.
 :::
 
@@ -2194,7 +2194,7 @@ You should not rely on automatic batch splitting, since this may hurt performanc
 
 Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core.
 
-:::warning
+:::important
 To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments do not allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start.
 :::
 
@@ -2858,7 +2858,7 @@ Possible values:
 
 Default value: `0`.
 
-:::warning
+:::important
 Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care.
 :::
 
diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 3f9a0f67187..21c3bf84250 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -471,6 +471,6 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt
 
 Zero-copy replication is possible, but not recommended, with  `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
 
-:::warning Zero-copy replication is not ready for production
+:::important Zero-copy replication is not ready for production
 Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
 :::
diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md
index 106d3c59dea..c9477214fd6 100644
--- a/docs/en/operations/system-tables/parts.md
+++ b/docs/en/operations/system-tables/parts.md
@@ -99,7 +99,7 @@ Columns:
 
 -   `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
 
-:::warning
+:::important
 The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
 :::
 
diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md
index da34a6b7e9c..00e7155edc1 100644
--- a/docs/en/operations/tips.md
+++ b/docs/en/operations/tips.md
@@ -36,7 +36,7 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
 Use `perf top` to watch the time spent in the kernel for memory management.
 Permanent huge pages also do not need to be allocated.
 
-:::warning
+:::important
 If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate.
 :::
 
diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md
index 87280bc3ba8..1b75530dda7 100644
--- a/docs/en/operations/utilities/clickhouse-copier.md
+++ b/docs/en/operations/utilities/clickhouse-copier.md
@@ -8,7 +8,7 @@ sidebar_label: clickhouse-copier
 
 Copies data from the tables in one cluster to tables in another (or the same) cluster.
 
-:::warning    
+:::important    
 To get a consistent copy, the data in the source tables and partitions should not change during the entire process.
 :::
 
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 40184c0aa02..0bd43034c1b 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -90,7 +90,7 @@ Checks whether the sequence contains an event chain that matches the pattern.
 sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 ```
 
-:::warning
+:::important
 Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
 :::
 
@@ -176,7 +176,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched.
 
-:::warning
+:::important
 Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
 :::
 
diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md
index 38c414fa8cd..7a5fa088f6e 100644
--- a/docs/en/sql-reference/data-types/float.md
+++ b/docs/en/sql-reference/data-types/float.md
@@ -6,7 +6,7 @@ sidebar_label: Float32, Float64
 
 # Float32, Float64
 
-:::warning
+:::important
 If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below:
 
 ```
diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md
index d9099ba5ad3..29d6553b888 100644
--- a/docs/en/sql-reference/data-types/json.md
+++ b/docs/en/sql-reference/data-types/json.md
@@ -6,7 +6,7 @@ sidebar_label: JSON
 
 # JSON
 
-:::warning
+:::important
 This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead.
 :::
 
@@ -14,7 +14,7 @@ Stores JavaScript Object Notation (JSON) documents in a single column.
 
 `JSON` is an alias for `Object('json')`.
 
-:::warning
+:::important
 The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`.
 :::
 
diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md
index 5169bc646c9..68494e52360 100644
--- a/docs/en/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/en/sql-reference/data-types/special-data-types/interval.md
@@ -8,7 +8,7 @@ sidebar_label: Interval
 
 The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator.
 
-:::warning    
+:::important    
 `Interval` data type values can’t be stored in tables.
 :::
 
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 4dc6fd33849..7af5923e052 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -299,7 +299,7 @@ Example: The table contains discounts for each advertiser in the format:
 
 To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
 
-:::warning
+:::important
 Values of `range_min` and `range_max` should fit in `Int64` type.
 :::
 
@@ -588,7 +588,7 @@ Set a large enough cache size. You need to experiment to select the number of ce
 3.  Assess memory consumption using the `system.dictionaries` table.
 4.  Increase or decrease the number of cells until the required memory consumption is reached.
 
-:::warning
+:::important
 Do not use ClickHouse as a source, because it is slow to process queries with random reads.
 :::
 
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 8271a342941..06d1c817a13 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -60,7 +60,7 @@ ClickHouse supports the following types of keys:
 
 An xml structure can contain either `<id>` or `<key>`. DDL-query must contain single `PRIMARY KEY`.
 
-:::warning    
+:::important    
 You must not describe key as an attribute.
 :::
 
@@ -178,4 +178,4 @@ Configuration fields:
 
 ## Related Content
 
-- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)
\ No newline at end of file
+- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)
diff --git a/docs/en/sql-reference/distributed-ddl.md b/docs/en/sql-reference/distributed-ddl.md
index ff5155391be..d170f3765c2 100644
--- a/docs/en/sql-reference/distributed-ddl.md
+++ b/docs/en/sql-reference/distributed-ddl.md
@@ -18,6 +18,6 @@ In order to run these queries correctly, each host must have the same cluster de
 
 The local version of the query will eventually be executed on each host in the cluster, even if some hosts are currently not available.
 
-:::warning    
+:::important    
 The order for executing queries within a single host is guaranteed.
-:::
\ No newline at end of file
+:::
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 69dc73e2fb0..833e0a40b2b 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -125,7 +125,7 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','
 
 Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
 
-:::warning
+:::important
 This 128-bit variant differs from the reference implementation and it's weaker.
 This version exists because, when it was written, there was no official 128-bit extension for SipHash.
 New projects should probably use [sipHash128Reference](#hash_functions-siphash128reference).
@@ -165,7 +165,7 @@ Result:
 
 Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key.
 
-:::warning
+:::important
 This 128-bit variant differs from the reference implementation and it's weaker.
 This version exists because, when it was written, there was no official 128-bit extension for SipHash.
 New projects should probably use [sipHash128ReferenceKeyed](#hash_functions-siphash128referencekeyed).
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 9357f75b8e6..78f763318df 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -8,7 +8,7 @@ sidebar_label: Introspection
 
 You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling.
 
-:::warning    
+:::important    
 These functions are slow and may impose security considerations.
 :::
 
diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md
index f68448af2be..62116fe50a3 100644
--- a/docs/en/sql-reference/functions/nlp-functions.md
+++ b/docs/en/sql-reference/functions/nlp-functions.md
@@ -5,7 +5,7 @@ sidebar_label: NLP
 title: "[experimental] Natural Language Processing functions"
 ---
 
-:::warning
+:::important
 This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
 :::
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 7146484361e..0f95f16ffed 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -792,7 +792,7 @@ neighbor(column, offset[, default_value])
 
 The result of the function depends on the affected data blocks and the order of data in the block.
 
-:::warning    
+:::important    
 It can reach the neighbor rows only inside the currently processed data block.
 :::
 
@@ -902,7 +902,7 @@ Result:
 Calculates the difference between successive row values ​​in the data block.
 Returns 0 for the first row and the difference from the previous row for each subsequent row.
 
-:::warning    
+:::important    
 It can reach the previous row only inside the currently processed data block.
 :::
 
@@ -986,7 +986,7 @@ Each event has a start time and an end time. The start time is included in the e
 The function calculates the total number of active (concurrent) events for each event start time.
 
 
-:::warning    
+:::important    
 Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly.
 :::
 
@@ -1674,7 +1674,7 @@ Result:
 
 Accumulates states of an aggregate function for each row of a data block.
 
-:::warning    
+:::important    
 The state is reset for each new data block.
 :::
 
diff --git a/docs/en/sql-reference/operators/exists.md b/docs/en/sql-reference/operators/exists.md
index 4bc29389c9c..6819048e28f 100644
--- a/docs/en/sql-reference/operators/exists.md
+++ b/docs/en/sql-reference/operators/exists.md
@@ -7,7 +7,7 @@ The `EXISTS` operator checks how many records are in the result of a subquery. I
 
 `EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
 
-:::warning    
+:::important    
 References to main query tables and columns are not supported in a subquery.
 :::
 
diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md
index 0fe7ebbf4b6..98dee830cd4 100644
--- a/docs/en/sql-reference/operators/index.md
+++ b/docs/en/sql-reference/operators/index.md
@@ -229,7 +229,7 @@ Types of intervals:
 
 You can also use a string literal when setting the `INTERVAL` value. For example, `INTERVAL 1 HOUR` is identical to the `INTERVAL '1 hour'` or `INTERVAL '1' hour`.
 
-:::warning    
+:::important    
 Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below.
 :::
 
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index d580efa4992..99958c31b92 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -75,7 +75,7 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,
 
 Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.
 
-:::warning    
+:::important    
 You can’t delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error.
 :::
 
diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md
index 844b24d7374..0b19966798b 100644
--- a/docs/en/sql-reference/statements/alter/constraint.md
+++ b/docs/en/sql-reference/statements/alter/constraint.md
@@ -17,7 +17,7 @@ See more on [constraints](../../../sql-reference/statements/create/table.md#cons
 
 Queries will add or remove metadata about constraints from table so they are processed immediately.
 
-:::warning
+:::important
 Constraint check **will not be executed** on existing data if it was added.
 :::
 
diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md
index 31ce9221eea..93bc5729c8e 100644
--- a/docs/en/sql-reference/statements/create/row-policy.md
+++ b/docs/en/sql-reference/statements/create/row-policy.md
@@ -7,7 +7,7 @@ title: "CREATE ROW POLICY"
 
 Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table.
 
-:::warning    
+:::important    
 Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
 :::
 
diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index 50e74920e4b..7da1c9669ff 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -286,7 +286,7 @@ ENGINE = engine
 PRIMARY KEY(expr1[, expr2,...]);
 ```
 
-:::warning
+:::important
 You can't combine both ways in one query.
 :::
 
@@ -342,7 +342,7 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default);
 
 Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`.
 
-:::warning
+:::important
 You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility.
 :::
 
@@ -437,11 +437,11 @@ Encryption codecs:
 
 These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content).
 
-:::warning
+:::important
 Most engines including the "\*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed.
 :::
 
-:::warning
+:::important
 If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging.
 :::
 
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index a756b3d4a0d..e2cf195ebd6 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -55,7 +55,7 @@ Another way of specifying host is to use `@` syntax following the username. Exam
 -   `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax.
 -   `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax.
 
-:::warning
+:::important
 ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so.
 :::
 
diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md
index e1987e50af4..e5a0a8f5b2b 100644
--- a/docs/en/sql-reference/statements/delete.md
+++ b/docs/en/sql-reference/statements/delete.md
@@ -32,7 +32,7 @@ SET allow_experimental_lightweight_delete = true;
 
 An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
 
-:::warning
+:::important
 Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Lightweight deletes are currently efficient for wide parts, but for compact parts, they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
 :::
 
diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md
index 78615a2f9ad..ee5ad512dd5 100644
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@@ -7,7 +7,7 @@ title: "OPTIMIZE Statement"
 
 This query tries to initialize an unscheduled merge of data parts for tables.
 
-:::warning
+:::important
 `OPTIMIZE` can’t fix the `Too many parts` error.
 :::
 
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index f9f55acfcec..a7fd0944593 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -312,7 +312,7 @@ One may execute query after:
 Replica attaches locally found parts and sends info about them to Zookeeper.
 Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network).
 
-:::warning
+:::important
 Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached.
 :::
 
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index 90d19e6be0e..e4220be60e9 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -6,7 +6,7 @@ sidebar_label: WATCH
 
 # WATCH Statement (Experimental)
 
-:::warning    
+:::important    
 This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`.
 :::
 
@@ -107,4 +107,4 @@ The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/s
 
 :::note    
 The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
-:::
\ No newline at end of file
+:::
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 594c328c3ff..bf0fca648a3 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -109,7 +109,7 @@ Query the number of rows in all files of these two directories:
 SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
 ```
 
-:::warning    
+:::tip    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 97a253a5356..784e45d0f81 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -79,7 +79,7 @@ SELECT count(*)
 FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
 ```
 
-:::warning    
+:::tip    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md
index 231c552610f..78f56372310 100644
--- a/docs/en/sql-reference/table-functions/hdfsCluster.md
+++ b/docs/en/sql-reference/table-functions/hdfsCluster.md
@@ -50,7 +50,7 @@ SELECT count(*)
 FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
 ```
 
-:::warning    
+:::tip    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md
index b49c2f8da20..940fae73b66 100644
--- a/docs/en/sql-reference/table-functions/index.md
+++ b/docs/en/sql-reference/table-functions/index.md
@@ -20,6 +20,6 @@ You can use table functions in:
 
 -   [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query.
 
-:::warning
+:::tip
 You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
 :::
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index d7199717798..62b8150de69 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -112,7 +112,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/
 └─────────┘
 ```
 
-:::warning
+:::tip
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md
index f420a69596c..504f92b4dc0 100644
--- a/docs/en/sql-reference/table-functions/s3Cluster.md
+++ b/docs/en/sql-reference/table-functions/s3Cluster.md
@@ -42,7 +42,7 @@ SELECT * FROM s3Cluster(
 
 Count the total amount of rows in all files in the cluster `cluster_simple`:
 
-:::warning    
+:::tip    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 

From 46979e383f2f893c18ec2a1ef021c973cf6f0d06 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 9 Mar 2023 18:21:47 +0000
Subject: [PATCH 326/333] Fix big numbers inference in CSV

---
 src/Formats/SchemaInferenceUtils.cpp                         | 3 +++
 .../0_stateless/02587_csv_big_numbers_inference.reference    | 4 ++++
 .../queries/0_stateless/02587_csv_big_numbers_inference.sql  | 5 +++++
 3 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/02587_csv_big_numbers_inference.reference
 create mode 100644 tests/queries/0_stateless/02587_csv_big_numbers_inference.sql

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 00eb686385d..7bd220e529b 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -984,6 +984,9 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
         if (tryReadIntText(tmp_int, buf) && buf.eof())
             return std::make_shared<DataTypeInt64>();
 
+        /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
+        buf.position() = buf.buffer().begin();
+
         /// In case of Int64 overflow, try to infer UInt64
         UInt64 tmp_uint;
         if (tryReadIntText(tmp_uint, buf) && buf.eof())
diff --git a/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference b/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference
new file mode 100644
index 00000000000..5b38606d1fd
--- /dev/null
+++ b/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference
@@ -0,0 +1,4 @@
+c1	Nullable(Float64)					
+100000000000000000000
+c1	Nullable(Float64)					
+-100000000000000000000
diff --git a/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql b/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql
new file mode 100644
index 00000000000..45a93034524
--- /dev/null
+++ b/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql
@@ -0,0 +1,5 @@
+desc format('CSV', '100000000000000000000');
+select * from format('CSV', '100000000000000000000');
+desc format('CSV', '-100000000000000000000');
+select * from format('CSV', '-100000000000000000000');
+

From 7cbf77a178d1815e606262a9927fc62178ce782f Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Thu, 9 Mar 2023 19:55:58 +0100
Subject: [PATCH 327/333] Update LRUFileCachePriority.cpp

---
 src/Interpreters/Cache/LRUFileCachePriority.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 8010b9c682b..c20379e1fc1 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -34,7 +34,7 @@ IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, siz
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements);
 
-    LOG_TRACE(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
+    LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
 
     return std::make_shared<LRUFileCacheIterator>(this, iter);
 }
@@ -54,7 +54,7 @@ void LRUFileCachePriority::removeAll(std::lock_guard<std::mutex> &)
     CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, cache_size);
     CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size());
 
-    LOG_TRACE(log, "Removed all entries from LRU queue");
+    LOG_TEST(log, "Removed all entries from LRU queue");
 
     queue.clear();
     cache_size = 0;
@@ -88,7 +88,7 @@ void LRUFileCachePriority::LRUFileCacheIterator::removeAndGetNext(std::lock_guar
     CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, queue_iter->size);
     CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
 
-    LOG_TRACE(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
+    LOG_TEST(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
 
     queue_iter = cache_priority->queue.erase(queue_iter);
 }

From de68dade71add8c4351d35ae1f8b35ec34b3fcd1 Mon Sep 17 00:00:00 2001
From: Rich Raposa <richraposa@gmail.com>
Date: Thu, 9 Mar 2023 14:30:40 -0700
Subject: [PATCH 328/333] Revert "standardize admonitions"

---
 .../database-engines/materialized-mysql.md    |  4 +--
 .../materialized-postgresql.md                |  4 +--
 docs/en/engines/database-engines/mysql.md     |  2 +-
 .../table-engines/integrations/hdfs.md        |  4 +--
 .../table-engines/integrations/kafka.md       |  2 +-
 .../integrations/materialized-postgresql.md   |  2 +-
 .../table-engines/integrations/postgresql.md  |  2 +-
 .../engines/table-engines/integrations/s3.md  |  4 +--
 .../mergetree-family/aggregatingmergetree.md  |  2 +-
 .../mergetree-family/collapsingmergetree.md   |  2 +-
 .../custom-partitioning-key.md                |  4 +--
 .../mergetree-family/graphitemergetree.md     |  6 ++--
 .../mergetree-family/invertedindexes.md       |  2 +-
 .../mergetree-family/mergetree.md             |  4 +--
 .../mergetree-family/replacingmergetree.md    |  4 +--
 .../mergetree-family/replication.md           |  2 +-
 .../mergetree-family/summingmergetree.md      |  2 +-
 .../versionedcollapsingmergetree.md           |  2 +-
 docs/en/interfaces/formats.md                 | 28 +++++++++----------
 docs/en/interfaces/http.md                    |  4 +--
 docs/en/interfaces/postgresql.md              |  2 +-
 .../third-party/client-libraries.md           |  2 +-
 .../en/interfaces/third-party/integrations.md |  2 +-
 docs/en/operations/access-rights.md           |  4 +--
 .../external-authenticators/kerberos.md       |  6 ++--
 docs/en/operations/opentelemetry.md           |  2 +-
 docs/en/operations/query-cache.md             |  2 +-
 .../settings.md                               |  4 +--
 .../settings/merge-tree-settings.md           |  2 +-
 .../operations/settings/settings-formats.md   |  2 +-
 docs/en/operations/settings/settings-users.md |  4 +--
 docs/en/operations/settings/settings.md       | 18 ++++++------
 docs/en/operations/storing-data.md            |  2 +-
 docs/en/operations/system-tables/parts.md     |  2 +-
 docs/en/operations/tips.md                    |  2 +-
 .../operations/utilities/clickhouse-copier.md |  2 +-
 .../parametric-functions.md                   |  4 +--
 docs/en/sql-reference/data-types/float.md     |  2 +-
 docs/en/sql-reference/data-types/json.md      |  4 +--
 .../data-types/special-data-types/interval.md |  2 +-
 .../external-dicts-dict-layout.md             |  4 +--
 .../external-dicts-dict-structure.md          |  4 +--
 docs/en/sql-reference/distributed-ddl.md      |  4 +--
 .../sql-reference/functions/hash-functions.md |  4 +--
 .../sql-reference/functions/introspection.md  |  2 +-
 .../sql-reference/functions/nlp-functions.md  |  2 +-
 .../functions/other-functions.md              |  8 +++---
 docs/en/sql-reference/operators/exists.md     |  2 +-
 docs/en/sql-reference/operators/index.md      |  2 +-
 .../sql-reference/statements/alter/column.md  |  2 +-
 .../statements/alter/constraint.md            |  2 +-
 .../statements/create/row-policy.md           |  2 +-
 .../sql-reference/statements/create/table.md  |  8 +++---
 .../sql-reference/statements/create/user.md   |  2 +-
 docs/en/sql-reference/statements/delete.md    |  2 +-
 docs/en/sql-reference/statements/optimize.md  |  2 +-
 docs/en/sql-reference/statements/system.md    |  2 +-
 docs/en/sql-reference/statements/watch.md     |  4 +--
 docs/en/sql-reference/table-functions/file.md |  2 +-
 docs/en/sql-reference/table-functions/hdfs.md |  2 +-
 .../table-functions/hdfsCluster.md            |  2 +-
 .../en/sql-reference/table-functions/index.md |  2 +-
 docs/en/sql-reference/table-functions/s3.md   |  2 +-
 .../table-functions/s3Cluster.md              |  2 +-
 64 files changed, 113 insertions(+), 113 deletions(-)

diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md
index 9b2d1d7e1de..899c8d024f1 100644
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@@ -6,7 +6,7 @@ sidebar_position: 70
 
 # [experimental] MaterializedMySQL 
 
-:::important
+:::warning
 This is an experimental feature that should not be used in production.
 :::
 
@@ -245,7 +245,7 @@ extra care needs to be taken.
 
 You may specify overrides for tables that do not exist yet.
 
-:::important
+:::warning
 It is easy to break replication with table overrides if not used with care. For example:
     
 * If an ALIAS column is added with a table override, and a column with the same name is later added to the source
diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md
index 7be520ee25e..b43f71a7576 100644
--- a/docs/en/engines/database-engines/materialized-postgresql.md
+++ b/docs/en/engines/database-engines/materialized-postgresql.md
@@ -54,7 +54,7 @@ After `MaterializedPostgreSQL` database is created, it does not automatically de
 ATTACH TABLE postgres_database.new_table;
 ```
 
-:::important
+:::warning
 Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
 :::
 
@@ -145,7 +145,7 @@ FROM pg_class
 WHERE oid = 'postgres_table'::regclass;
 ```
 
-:::important
+:::warning
 Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
 :::
 
diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md
index e2c4f134a90..e4ff734d55f 100644
--- a/docs/en/engines/database-engines/mysql.md
+++ b/docs/en/engines/database-engines/mysql.md
@@ -60,7 +60,7 @@ These variables are supported:
 - `version`
 - `max_allowed_packet`
 
-:::important
+:::warning
 By now these variables are stubs and don't correspond to anything.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index c14d05aa03a..7c04a6594a6 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -64,7 +64,7 @@ SELECT * FROM hdfs_engine_table LIMIT 2
     -   Indexes.
     -   [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended.
   
-  :::important Zero-copy replication is not ready for production
+  :::warning Zero-copy replication is not ready for production
   Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
   :::
 
@@ -110,7 +110,7 @@ Table consists of all the files in both directories (all files should satisfy fo
 CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV')
 ```
 
-:::important
+:::warning
 If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index 86647fb6978..255ba06f056 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -102,7 +102,7 @@ Examples:
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects. If possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
index c9b3f351568..11e7928c3ed 100644
--- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md
+++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md
@@ -52,6 +52,6 @@ PRIMARY KEY key;
 SELECT key, value, _version FROM postgresql_db.postgresql_replica;
 ```
 
-:::important
+:::warning
 Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
 :::
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index d338a2a58bd..b73d28c8508 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -74,7 +74,7 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp
 
 PostgreSQL `Array` types are converted into ClickHouse arrays.
 
-:::important
+:::warning
 Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column.
 :::
 
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 8e1a4d91cac..723425429a5 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -63,7 +63,7 @@ For more information about virtual columns see [here](../../../engines/table-eng
     -   Indexes.
     -   [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not supported.
 
-  :::important Zero-copy replication is not ready for production
+  :::warning Zero-copy replication is not ready for production
   Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
   :::
 
@@ -78,7 +78,7 @@ For more information about virtual columns see [here](../../../engines/table-eng
 
 Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
 
-:::important
+:::warning
 If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
index 9b7f0cd2486..9677f75a358 100644
--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@@ -43,7 +43,7 @@ When creating an `AggregatingMergeTree` table the same [clauses](../../../engine
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects and, if possible, switch the old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
index 9acff099557..0bd665116f0 100644
--- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
@@ -45,7 +45,7 @@ When creating a `CollapsingMergeTree` table, the same [query clauses](../../../e
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects and, if possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
index f01cd90ccf0..b1e79c4c3fd 100644
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -6,7 +6,7 @@ sidebar_label: Custom Partitioning Key
 
 # Custom Partitioning Key
 
-:::important
+:::warning
 In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months.
 
 You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression.
@@ -159,7 +159,7 @@ FROM session_log
 GROUP BY UserID;
 ```
 
-:::important
+:::warning
 Performance of such a query heavily depends on the table layout. Because of that the optimisation is not enabled by default.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
index d71004135f1..104ec049ec4 100644
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@@ -55,7 +55,7 @@ When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/t
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects and, if possible, switch old projects to the method described above.
 :::
 
@@ -129,7 +129,7 @@ default
     ...
 ```
 
-:::important
+:::warning
 Patterns must be strictly ordered:
 
 1. Patterns without `function` or `retention`.
@@ -263,6 +263,6 @@ Valid values:
 </graphite_rollup>
 ```
 
-:::important
+:::warning
 Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
 :::
diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
index 91ee0313a69..aa11258dc4a 100644
--- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
@@ -15,7 +15,7 @@ tokenized cells of the string column. For example, the string cell "I will be a
 " wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
 useful the resulting inverted index will be.
 
-:::important
+:::warning
 Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
 ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.
 :::
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 5dbe7602856..fc8060077b0 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -192,7 +192,7 @@ The `index_granularity` setting can be omitted because 8192 is the default value
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects. If possible, switch old projects to the method described above.
 :::
 
@@ -1087,7 +1087,7 @@ Other parameters:
 
 Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
 
-  :::important Zero-copy replication is not ready for production
+  :::warning Zero-copy replication is not ready for production
   Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
   :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
index 0b76500f2a6..f5d81182898 100644
--- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
@@ -30,7 +30,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 
 For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md).
 
-:::important
+:::warning
 Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`.
 :::
 
@@ -96,7 +96,7 @@ When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/t
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects and, if possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md
index f9adad0fa6d..37ab8ac9fd3 100644
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@@ -43,7 +43,7 @@ ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/clickhouse-keeper
 
 To use replication, set parameters in the [zookeeper](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings_zookeeper) server configuration section.
 
-:::important
+:::warning
 Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
index bea37706939..b2b6272c58e 100644
--- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
@@ -44,7 +44,7 @@ When creating a `SummingMergeTree` table the same [clauses](../../../engines/tab
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects and, if possible, switch the old projects to the method described above.
 :::
 
diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
index 1a44569711a..2891907f79a 100644
--- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@@ -58,7 +58,7 @@ When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../.
 
 <summary>Deprecated Method for Creating a Table</summary>
 
-:::important
+:::warning
 Do not use this method in new projects. If possible, switch old projects to the method described above.
 :::
 
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index fa15de16c71..db2e773a685 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -205,7 +205,7 @@ Differs from the `TabSeparated` format in that the column names are written in t
 
 During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -217,7 +217,7 @@ This format is also available under the name `TSVWithNames`.
 
 Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -470,7 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -480,7 +480,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -500,7 +500,7 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -510,7 +510,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -969,7 +969,7 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
 
 Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -979,7 +979,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -991,7 +991,7 @@ the types from input data will be compared with the types of the corresponding c
 
 Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1001,7 +1001,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1120,7 +1120,7 @@ CREATE TABLE IF NOT EXISTS example_table
 -   If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type).
 -   If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
 
-:::important
+:::warning
 When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
 :::
 
@@ -1450,7 +1450,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
 -   N `String`s specifying column names
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1464,7 +1464,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   N `String`s specifying column names
 -   N `String`s specifying column types
 
-:::important
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1912,7 +1912,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
 SELECT * FROM topic1_stream;
 ```
 
-:::important
+:::warning
 Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine.
 :::
 
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index efa51bb34ea..9af6df0c87d 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -445,7 +445,7 @@ Next are the configuration methods for different `type`.
 
 The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
 
-:::important
+:::warning
 To keep the default `handlers` such as` query`, `play`,` ping`, add the `<defaults/>` rule.
 :::
 
@@ -476,7 +476,7 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:
 max_final_threads   2
 ```
 
-:::important
+:::warning
 In one `predefined_query_handler` only supports one `query` of an insert type.
 :::
 
diff --git a/docs/en/interfaces/postgresql.md b/docs/en/interfaces/postgresql.md
index 0cc70cce571..9ff83559787 100644
--- a/docs/en/interfaces/postgresql.md
+++ b/docs/en/interfaces/postgresql.md
@@ -54,7 +54,7 @@ default=>
 
 And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse.
 
-:::important
+:::caution
 The PostgreSQL protocol currently only supports plain-text passwords.
 :::
 
diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md
index 4ce63ba647b..0e065cb7179 100644
--- a/docs/en/interfaces/third-party/client-libraries.md
+++ b/docs/en/interfaces/third-party/client-libraries.md
@@ -6,7 +6,7 @@ sidebar_label: Client Libraries
 
 # Client Libraries from Third-party Developers
 
-:::important
+:::warning
 ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality.
 :::
 
diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md
index 8197a1d1f58..90a4f088be7 100644
--- a/docs/en/interfaces/third-party/integrations.md
+++ b/docs/en/interfaces/third-party/integrations.md
@@ -6,7 +6,7 @@ sidebar_label: Integrations
 
 # Integration Libraries from Third-party Developers
 
-:::important Disclaimer
+:::warning Disclaimer
 ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality.
 :::
 
diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md
index 38d32e3f1f7..4c4a06dbe1e 100644
--- a/docs/en/operations/access-rights.md
+++ b/docs/en/operations/access-rights.md
@@ -24,7 +24,7 @@ You can configure access entities using:
 
 We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow.
 
-:::important
+:::warning
 You can’t manage the same access entity by both configuration methods simultaneously.
 :::
 
@@ -102,7 +102,7 @@ Privileges can be granted to a role by the [GRANT](../sql-reference/statements/g
 
 Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy.
 
-:::important
+:::warning
 Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
 :::
 
diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md
index b49291f8e2e..95944e96194 100644
--- a/docs/en/operations/external-authenticators/kerberos.md
+++ b/docs/en/operations/external-authenticators/kerberos.md
@@ -59,11 +59,11 @@ With filtering by realm:
 </clickhouse>
 ```
 
-:::important
+:::warning
 You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication.
 :::
 
-:::important
+:::warning
 `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication.
 :::
 
@@ -103,7 +103,7 @@ Example (goes into `users.xml`):
 </clickhouse>
 ```
 
-:::important
+:::warning
 Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown.
 :::
 
diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md
index 7faa4b1ee7c..1de5a09db0c 100644
--- a/docs/en/operations/opentelemetry.md
+++ b/docs/en/operations/opentelemetry.md
@@ -7,7 +7,7 @@ title: "[experimental] Tracing ClickHouse with OpenTelemetry"
 
 [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry.
 
-:::important    
+:::warning    
 This is an experimental feature that will change in backwards-incompatible ways in future releases.
 :::
 
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index a7ac011c796..1a486de7904 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -29,7 +29,7 @@ Transactionally inconsistent caching is traditionally provided by client tools o
 the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
 This reduces maintenance effort and avoids redundancy.
 
-:::important
+:::warning
 The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
 processing) where wrong results are returned.
 :::
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 4494ad39a0e..3fe815bc79a 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -24,7 +24,7 @@ Default value: 3600.
 
 Data compression settings for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables.
 
-:::tip
+:::warning
 Don’t use it if you have just started using ClickHouse.
 :::
 
@@ -1367,7 +1367,7 @@ The following settings are available:
 
 Changed settings take effect immediately.
 
-:::important
+:::warning
 Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
 :::
 
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index 6290a23378c..5bc174727ad 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -289,7 +289,7 @@ Default value: 0 (seconds)
 
 When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
 
-:::important Zero-copy replication is not ready for production
+:::warning Zero-copy replication is not ready for production
 Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
 :::
 
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index b03f922d0d4..919ebaf562f 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -142,7 +142,7 @@ y	Nullable(String)
 z	IPv4
 ```
 
-:::important
+:::warning
 If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
 :::
 
diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md
index ac0025a1137..b55d64fc4f7 100644
--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@@ -118,8 +118,8 @@ To open access for user from any network, specify:
 <ip>::/0</ip>
 ```
 
-:::important
-It is insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet.
+:::warning
+It’s insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet.
 :::
 
 To open access only from localhost, specify:
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 809c7c3e837..94dcf159ca9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -460,7 +460,7 @@ Possible values:
 
 Changes the behaviour of join operations with `ANY` strictness.
 
-:::important
+:::warning
 This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables.
 :::
 
@@ -550,7 +550,7 @@ Default value: 64.
 
 Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
 
-:::important
+:::warning
 Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
 :::
 
@@ -942,7 +942,7 @@ Higher values will lead to higher memory usage.
 
 The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
 
-:::important
+:::warning
 This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
 :::
 
@@ -960,7 +960,7 @@ We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows,
 
 We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed.
 
-:::important
+:::warning
 This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
 :::
 
@@ -1247,7 +1247,7 @@ Possible values:
 
 Default value: 1.
 
-:::important
+:::warning
 Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
 If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
 If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
@@ -1277,7 +1277,7 @@ Default value: `1`.
 
 This options will produce different results depending on the settings used.
 
-:::important
+:::warning
 This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
 :::
 
@@ -2186,7 +2186,7 @@ Default value: 0.
 This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine).
 :::
 
-:::important
+:::warning
 You should not rely on automatic batch splitting, since this may hurt performance.
 :::
 
@@ -2194,7 +2194,7 @@ You should not rely on automatic batch splitting, since this may hurt performanc
 
 Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core.
 
-:::important
+:::warning
 To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments do not allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start.
 :::
 
@@ -2858,7 +2858,7 @@ Possible values:
 
 Default value: `0`.
 
-:::important
+:::warning
 Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care.
 :::
 
diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 21c3bf84250..3f9a0f67187 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -471,6 +471,6 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt
 
 Zero-copy replication is possible, but not recommended, with  `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
 
-:::important Zero-copy replication is not ready for production
+:::warning Zero-copy replication is not ready for production
 Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
 :::
diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md
index c9477214fd6..106d3c59dea 100644
--- a/docs/en/operations/system-tables/parts.md
+++ b/docs/en/operations/system-tables/parts.md
@@ -99,7 +99,7 @@ Columns:
 
 -   `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
 
-:::important
+:::warning
 The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
 :::
 
diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md
index 00e7155edc1..da34a6b7e9c 100644
--- a/docs/en/operations/tips.md
+++ b/docs/en/operations/tips.md
@@ -36,7 +36,7 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
 Use `perf top` to watch the time spent in the kernel for memory management.
 Permanent huge pages also do not need to be allocated.
 
-:::important
+:::warning
 If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate.
 :::
 
diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md
index 1b75530dda7..87280bc3ba8 100644
--- a/docs/en/operations/utilities/clickhouse-copier.md
+++ b/docs/en/operations/utilities/clickhouse-copier.md
@@ -8,7 +8,7 @@ sidebar_label: clickhouse-copier
 
 Copies data from the tables in one cluster to tables in another (or the same) cluster.
 
-:::important    
+:::warning    
 To get a consistent copy, the data in the source tables and partitions should not change during the entire process.
 :::
 
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 0bd43034c1b..40184c0aa02 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -90,7 +90,7 @@ Checks whether the sequence contains an event chain that matches the pattern.
 sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 ```
 
-:::important
+:::warning
 Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
 :::
 
@@ -176,7 +176,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched.
 
-:::important
+:::warning
 Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
 :::
 
diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md
index 7a5fa088f6e..38c414fa8cd 100644
--- a/docs/en/sql-reference/data-types/float.md
+++ b/docs/en/sql-reference/data-types/float.md
@@ -6,7 +6,7 @@ sidebar_label: Float32, Float64
 
 # Float32, Float64
 
-:::important
+:::warning
 If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below:
 
 ```
diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md
index 29d6553b888..d9099ba5ad3 100644
--- a/docs/en/sql-reference/data-types/json.md
+++ b/docs/en/sql-reference/data-types/json.md
@@ -6,7 +6,7 @@ sidebar_label: JSON
 
 # JSON
 
-:::important
+:::warning
 This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead.
 :::
 
@@ -14,7 +14,7 @@ Stores JavaScript Object Notation (JSON) documents in a single column.
 
 `JSON` is an alias for `Object('json')`.
 
-:::important
+:::warning
 The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`.
 :::
 
diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md
index 68494e52360..5169bc646c9 100644
--- a/docs/en/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/en/sql-reference/data-types/special-data-types/interval.md
@@ -8,7 +8,7 @@ sidebar_label: Interval
 
 The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator.
 
-:::important    
+:::warning    
 `Interval` data type values can’t be stored in tables.
 :::
 
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 7af5923e052..4dc6fd33849 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -299,7 +299,7 @@ Example: The table contains discounts for each advertiser in the format:
 
 To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
 
-:::important
+:::warning
 Values of `range_min` and `range_max` should fit in `Int64` type.
 :::
 
@@ -588,7 +588,7 @@ Set a large enough cache size. You need to experiment to select the number of ce
 3.  Assess memory consumption using the `system.dictionaries` table.
 4.  Increase or decrease the number of cells until the required memory consumption is reached.
 
-:::important
+:::warning
 Do not use ClickHouse as a source, because it is slow to process queries with random reads.
 :::
 
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 06d1c817a13..8271a342941 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -60,7 +60,7 @@ ClickHouse supports the following types of keys:
 
 An xml structure can contain either `<id>` or `<key>`. DDL-query must contain single `PRIMARY KEY`.
 
-:::important    
+:::warning    
 You must not describe key as an attribute.
 :::
 
@@ -178,4 +178,4 @@ Configuration fields:
 
 ## Related Content
 
-- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)
+- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)
\ No newline at end of file
diff --git a/docs/en/sql-reference/distributed-ddl.md b/docs/en/sql-reference/distributed-ddl.md
index d170f3765c2..ff5155391be 100644
--- a/docs/en/sql-reference/distributed-ddl.md
+++ b/docs/en/sql-reference/distributed-ddl.md
@@ -18,6 +18,6 @@ In order to run these queries correctly, each host must have the same cluster de
 
 The local version of the query will eventually be executed on each host in the cluster, even if some hosts are currently not available.
 
-:::important    
+:::warning    
 The order for executing queries within a single host is guaranteed.
-:::
+:::
\ No newline at end of file
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 833e0a40b2b..69dc73e2fb0 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -125,7 +125,7 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','
 
 Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
 
-:::important
+:::warning
 This 128-bit variant differs from the reference implementation and it's weaker.
 This version exists because, when it was written, there was no official 128-bit extension for SipHash.
 New projects should probably use [sipHash128Reference](#hash_functions-siphash128reference).
@@ -165,7 +165,7 @@ Result:
 
 Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key.
 
-:::important
+:::warning
 This 128-bit variant differs from the reference implementation and it's weaker.
 This version exists because, when it was written, there was no official 128-bit extension for SipHash.
 New projects should probably use [sipHash128ReferenceKeyed](#hash_functions-siphash128referencekeyed).
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 78f763318df..9357f75b8e6 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -8,7 +8,7 @@ sidebar_label: Introspection
 
 You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling.
 
-:::important    
+:::warning    
 These functions are slow and may impose security considerations.
 :::
 
diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md
index 62116fe50a3..f68448af2be 100644
--- a/docs/en/sql-reference/functions/nlp-functions.md
+++ b/docs/en/sql-reference/functions/nlp-functions.md
@@ -5,7 +5,7 @@ sidebar_label: NLP
 title: "[experimental] Natural Language Processing functions"
 ---
 
-:::important
+:::warning
 This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
 :::
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 0f95f16ffed..7146484361e 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -792,7 +792,7 @@ neighbor(column, offset[, default_value])
 
 The result of the function depends on the affected data blocks and the order of data in the block.
 
-:::important    
+:::warning    
 It can reach the neighbor rows only inside the currently processed data block.
 :::
 
@@ -902,7 +902,7 @@ Result:
 Calculates the difference between successive row values ​​in the data block.
 Returns 0 for the first row and the difference from the previous row for each subsequent row.
 
-:::important    
+:::warning    
 It can reach the previous row only inside the currently processed data block.
 :::
 
@@ -986,7 +986,7 @@ Each event has a start time and an end time. The start time is included in the e
 The function calculates the total number of active (concurrent) events for each event start time.
 
 
-:::important    
+:::warning    
 Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly.
 :::
 
@@ -1674,7 +1674,7 @@ Result:
 
 Accumulates states of an aggregate function for each row of a data block.
 
-:::important    
+:::warning    
 The state is reset for each new data block.
 :::
 
diff --git a/docs/en/sql-reference/operators/exists.md b/docs/en/sql-reference/operators/exists.md
index 6819048e28f..4bc29389c9c 100644
--- a/docs/en/sql-reference/operators/exists.md
+++ b/docs/en/sql-reference/operators/exists.md
@@ -7,7 +7,7 @@ The `EXISTS` operator checks how many records are in the result of a subquery. I
 
 `EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
 
-:::important    
+:::warning    
 References to main query tables and columns are not supported in a subquery.
 :::
 
diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md
index 98dee830cd4..0fe7ebbf4b6 100644
--- a/docs/en/sql-reference/operators/index.md
+++ b/docs/en/sql-reference/operators/index.md
@@ -229,7 +229,7 @@ Types of intervals:
 
 You can also use a string literal when setting the `INTERVAL` value. For example, `INTERVAL 1 HOUR` is identical to the `INTERVAL '1 hour'` or `INTERVAL '1' hour`.
 
-:::important    
+:::warning    
 Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below.
 :::
 
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 99958c31b92..d580efa4992 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -75,7 +75,7 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,
 
 Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.
 
-:::important    
+:::warning    
 You can’t delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error.
 :::
 
diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md
index 0b19966798b..844b24d7374 100644
--- a/docs/en/sql-reference/statements/alter/constraint.md
+++ b/docs/en/sql-reference/statements/alter/constraint.md
@@ -17,7 +17,7 @@ See more on [constraints](../../../sql-reference/statements/create/table.md#cons
 
 Queries will add or remove metadata about constraints from table so they are processed immediately.
 
-:::important
+:::warning
 Constraint check **will not be executed** on existing data if it was added.
 :::
 
diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md
index 93bc5729c8e..31ce9221eea 100644
--- a/docs/en/sql-reference/statements/create/row-policy.md
+++ b/docs/en/sql-reference/statements/create/row-policy.md
@@ -7,7 +7,7 @@ title: "CREATE ROW POLICY"
 
 Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table.
 
-:::important    
+:::warning    
 Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
 :::
 
diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index 7da1c9669ff..50e74920e4b 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -286,7 +286,7 @@ ENGINE = engine
 PRIMARY KEY(expr1[, expr2,...]);
 ```
 
-:::important
+:::warning
 You can't combine both ways in one query.
 :::
 
@@ -342,7 +342,7 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default);
 
 Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`.
 
-:::important
+:::warning
 You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility.
 :::
 
@@ -437,11 +437,11 @@ Encryption codecs:
 
 These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content).
 
-:::important
+:::warning
 Most engines including the "\*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed.
 :::
 
-:::important
+:::warning
 If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging.
 :::
 
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index e2cf195ebd6..a756b3d4a0d 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -55,7 +55,7 @@ Another way of specifying host is to use `@` syntax following the username. Exam
 -   `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax.
 -   `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax.
 
-:::important
+:::warning
 ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so.
 :::
 
diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md
index e5a0a8f5b2b..e1987e50af4 100644
--- a/docs/en/sql-reference/statements/delete.md
+++ b/docs/en/sql-reference/statements/delete.md
@@ -32,7 +32,7 @@ SET allow_experimental_lightweight_delete = true;
 
 An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
 
-:::important
+:::warning
 Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Lightweight deletes are currently efficient for wide parts, but for compact parts, they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
 :::
 
diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md
index ee5ad512dd5..78615a2f9ad 100644
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@@ -7,7 +7,7 @@ title: "OPTIMIZE Statement"
 
 This query tries to initialize an unscheduled merge of data parts for tables.
 
-:::important
+:::warning
 `OPTIMIZE` can’t fix the `Too many parts` error.
 :::
 
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index a7fd0944593..f9f55acfcec 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -312,7 +312,7 @@ One may execute query after:
 Replica attaches locally found parts and sends info about them to Zookeeper.
 Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network).
 
-:::important
+:::warning
 Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached.
 :::
 
diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md
index e4220be60e9..90d19e6be0e 100644
--- a/docs/en/sql-reference/statements/watch.md
+++ b/docs/en/sql-reference/statements/watch.md
@@ -6,7 +6,7 @@ sidebar_label: WATCH
 
 # WATCH Statement (Experimental)
 
-:::important    
+:::warning    
 This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`.
 :::
 
@@ -107,4 +107,4 @@ The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/s
 
 :::note    
 The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
-:::
+:::
\ No newline at end of file
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index bf0fca648a3..594c328c3ff 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -109,7 +109,7 @@ Query the number of rows in all files of these two directories:
 SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
 ```
 
-:::tip    
+:::warning    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 784e45d0f81..97a253a5356 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -79,7 +79,7 @@ SELECT count(*)
 FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
 ```
 
-:::tip    
+:::warning    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md
index 78f56372310..231c552610f 100644
--- a/docs/en/sql-reference/table-functions/hdfsCluster.md
+++ b/docs/en/sql-reference/table-functions/hdfsCluster.md
@@ -50,7 +50,7 @@ SELECT count(*)
 FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32')
 ```
 
-:::tip    
+:::warning    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md
index 940fae73b66..b49c2f8da20 100644
--- a/docs/en/sql-reference/table-functions/index.md
+++ b/docs/en/sql-reference/table-functions/index.md
@@ -20,6 +20,6 @@ You can use table functions in:
 
 -   [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query.
 
-:::tip
+:::warning
 You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
 :::
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 62b8150de69..d7199717798 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -112,7 +112,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/
 └─────────┘
 ```
 
-:::tip
+:::warning
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md
index 504f92b4dc0..f420a69596c 100644
--- a/docs/en/sql-reference/table-functions/s3Cluster.md
+++ b/docs/en/sql-reference/table-functions/s3Cluster.md
@@ -42,7 +42,7 @@ SELECT * FROM s3Cluster(
 
 Count the total amount of rows in all files in the cluster `cluster_simple`:
 
-:::tip    
+:::warning    
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 

From 6f35d46ac8d9ee3484357015033e35c3912fff89 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 10 Mar 2023 05:01:06 +0300
Subject: [PATCH 329/333] Update SchemaInferenceUtils.cpp

---
 src/Formats/SchemaInferenceUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 7bd220e529b..011860948c3 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -984,7 +984,7 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
         if (tryReadIntText(tmp_int, buf) && buf.eof())
             return std::make_shared<DataTypeInt64>();
 
-        /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
+        /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
         buf.position() = buf.buffer().begin();
 
         /// In case of Int64 overflow, try to infer UInt64
@@ -993,7 +993,7 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
             return std::make_shared<DataTypeUInt64>();
     }
 
-    /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
+    /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
     buf.position() = buf.buffer().begin();
 
     Float64 tmp;

From 40eed939cca48687ce2fa13b721a822b908b0c0c Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Fri, 10 Mar 2023 10:07:53 +0100
Subject: [PATCH 330/333] Update README.md

Slacks invite links are unpleasant. We now redirect so we don't have to constantly change.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fcbe65e8223..17b4df154a9 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
 * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
 * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
-* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
+* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
 * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
 * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.

From 9bcd93d959cfe0a64517a82fe0a9530ccc224b1b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 10 Mar 2023 17:47:42 +0100
Subject: [PATCH 331/333] Update InterpreterInsertQuery.cpp

---
 src/Interpreters/InterpreterInsertQuery.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 3cae219fa60..b4a19ea7403 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -161,7 +161,7 @@ Block InterpreterInsertQuery::getSampleBlock(
             if (table_sample_physical.has(current_name))
             {
                 if (!allow_materialized)
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is a MATERIALIZED column",
+                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.",
                         current_name);
                 res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name));
             }
@@ -527,7 +527,7 @@ BlockIO InterpreterInsertQuery::execute()
         {
             for (const auto & column : metadata_snapshot->getColumns())
                 if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is a MATERIALIZED column", column.name);
+                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
         }
 
         res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));

From 363989f3f86ac96662624cf2272d5875b721233b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 10 Mar 2023 17:56:29 +0000
Subject: [PATCH 332/333] Update version_date.tsv and changelogs after
 v23.2.4.12-stable

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.2.4.12-stable.md | 20 ++++++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 5 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.2.4.12-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 500249b5bd6..34c1406b687 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \
     esac
 
 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.2.3.17"
+ARG VERSION="23.2.4.12"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 5f613eea4d0..f4ca498a7e7 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.2.3.17"
+ARG VERSION="23.2.4.12"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 3e99bca8bc2..13b3ebdb01c 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.2.3.17"
+ARG VERSION="23.2.4.12"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.2.4.12-stable.md b/docs/changelogs/v23.2.4.12-stable.md
new file mode 100644
index 00000000000..2b6a689aee5
--- /dev/null
+++ b/docs/changelogs/v23.2.4.12-stable.md
@@ -0,0 +1,20 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.2.4.12-stable (8fe866cb035) FIXME as compared to v23.2.3.17-stable (dec18bf7281)
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Backported in [#47277](https://github.com/ClickHouse/ClickHouse/issues/47277): Fix IPv4/IPv6 serialization/deserialization in binary formats that was broken in https://github.com/ClickHouse/ClickHouse/pull/43221. Closes [#46522](https://github.com/ClickHouse/ClickHouse/issues/46522). [#46616](https://github.com/ClickHouse/ClickHouse/pull/46616) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#47212](https://github.com/ClickHouse/ClickHouse/issues/47212): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#47363](https://github.com/ClickHouse/ClickHouse/issues/47363): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 34bc3f646fc..50a9314bc00 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v23.2.4.12-stable	2023-03-10
 v23.2.3.17-stable	2023-03-06
 v23.2.2.20-stable	2023-03-01
 v23.2.1.2537-stable	2023-02-23

From 3e7510a584fdd5d5e46e4020ecb1856cfb3710fe Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 10 Mar 2023 19:56:53 +0000
Subject: [PATCH 333/333] Fix 01429_join_on_error_messages

---
 tests/queries/0_stateless/01429_join_on_error_messages.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01429_join_on_error_messages.sql b/tests/queries/0_stateless/01429_join_on_error_messages.sql
index 9b8688c8415..23aed83a66f 100644
--- a/tests/queries/0_stateless/01429_join_on_error_messages.sql
+++ b/tests/queries/0_stateless/01429_join_on_error_messages.sql
@@ -15,7 +15,7 @@ SET join_algorithm = 'partial_merge';
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b, 1 c) B ON a = b OR a = c; -- { serverError 48 }
 -- works for a = b OR a = b because of equivalent disjunct optimization
 
-SET join_algorithm = 'auto';
+SET join_algorithm = 'grace_hash';
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b, 1 c) B ON a = b OR a = c; -- { serverError 48 }
 -- works for a = b OR a = b because of equivalent disjunct optimization