From 6934e27e8b94de95b3a76d096e7c7a7006a0ea34 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 20 Oct 2023 20:46:41 +0000
Subject: [PATCH 001/213] Add union mode for schema inference to infer union
 schema of files with different schemas

---
 src/Core/Settings.h                           |   7 +-
 src/Core/SettingsChangesHistory.h             |   3 +
 src/Core/SettingsEnums.cpp                    |   4 +
 src/Core/SettingsEnums.h                      |   8 +
 src/Formats/ReadSchemaUtils.cpp               | 108 +++++--
 src/Formats/ReadSchemaUtils.h                 |  24 +-
 src/Formats/SchemaInferenceUtils.cpp          |  58 ++++
 src/Formats/SchemaInferenceUtils.h            |  14 +-
 .../FunctionGenerateRandomStructure.cpp       |  12 +-
 src/Processors/Formats/ISchemaReader.cpp      |  14 +-
 src/Processors/Formats/ISchemaReader.h        |   5 +-
 .../Impl/JSONColumnsBlockInputFormatBase.cpp  |   5 +
 .../Impl/JSONColumnsBlockInputFormatBase.h    |   3 +-
 .../Impl/JSONCompactEachRowRowInputFormat.cpp |   5 +
 .../Impl/JSONCompactEachRowRowInputFormat.h   |   1 +
 .../Impl/JSONEachRowRowInputFormat.cpp        |   5 +
 .../Formats/Impl/JSONEachRowRowInputFormat.h  |   1 +
 .../Formats/Impl/MySQLDumpRowInputFormat.cpp  |   5 +
 .../Formats/Impl/MySQLDumpRowInputFormat.h    |   1 +
 .../Formats/Impl/RegexpRowInputFormat.h       |   1 -
 .../Formats/Impl/ValuesBlockInputFormat.cpp   |   5 +
 .../Formats/Impl/ValuesBlockInputFormat.h     |   1 +
 .../RowInputFormatWithNamesAndTypes.cpp       |   5 +
 .../Formats/RowInputFormatWithNamesAndTypes.h |   2 +
 src/Storages/Cache/SchemaCache.h              |   5 +-
 src/Storages/HDFS/StorageHDFS.cpp             | 154 +++++-----
 src/Storages/HDFS/StorageHDFS.h               |  13 -
 src/Storages/StorageAzureBlob.cpp             | 166 +++++-----
 src/Storages/StorageAzureBlob.h               |  15 -
 src/Storages/StorageFile.cpp                  | 285 ++++++++++--------
 src/Storages/StorageFile.h                    |  10 -
 src/Storages/StorageS3.cpp                    | 198 ++++++------
 src/Storages/StorageS3.h                      |  15 -
 src/Storages/StorageURL.cpp                   | 187 ++++++------
 src/Storages/StorageURL.h                     |  15 -
 .../StorageSystemSchemaInferenceCache.cpp     |   4 +-
 .../test_storage_azure_blob_storage/test.py   |  57 ++++
 tests/integration/test_storage_hdfs/test.py   |  49 +++
 tests/integration/test_storage_s3/test.py     |  62 ++++
 ...2900_union_schema_inference_mode.reference |  33 ++
 .../02900_union_schema_inference_mode.sh      |  57 ++++
 41 files changed, 1029 insertions(+), 593 deletions(-)
 create mode 100644 tests/queries/0_stateless/02900_union_schema_inference_mode.reference
 create mode 100755 tests/queries/0_stateless/02900_union_schema_inference_mode.sh

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 2c45108f9a5..e5b7c5ff30b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -893,11 +893,11 @@ class IColumn;
     M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
     M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \
     M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
-    M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
+    M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \
     M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
-    M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
+    M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \
     M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
-    M(Bool, input_format_arrow_allow_missing_columns, false, "Allow missing columns while reading Arrow input formats", 0) \
+    M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \
     M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \
     M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
     M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
@@ -925,6 +925,7 @@ class IColumn;
     M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Arrow", 0) \
     M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
     M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
+    M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
     M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
     M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
     M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 38039839e1e..caa1b28b1c5 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,6 +80,9 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"23.10", {{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
+              {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
               {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
               {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 82e158877c5..7f1162cc3ce 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -190,4 +190,8 @@ IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS,
      {"log_last", ExternalCommandStderrReaction::LOG_LAST},
      {"throw", ExternalCommandStderrReaction::THROW}})
 
+IMPLEMENT_SETTING_ENUM(SchemaInferenceMode, ErrorCodes::BAD_ARGUMENTS,
+    {{"default", SchemaInferenceMode::DEFAULT},
+     {"union", SchemaInferenceMode::UNION}})
+
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 7db8c88c53d..af0c73e686a 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -242,4 +242,12 @@ DECLARE_SETTING_ENUM(S3QueueAction)
 
 DECLARE_SETTING_ENUM(ExternalCommandStderrReaction)
 
+enum class SchemaInferenceMode
+{
+    DEFAULT,
+    UNION,
+};
+
+DECLARE_SETTING_ENUM(SchemaInferenceMode)
+
 }
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index b185007eda7..f2e831dfc46 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -1,12 +1,9 @@
-#include <DataTypes/DataTypeMap.h>
 #include <Formats/ReadSchemaUtils.h>
 #include <Interpreters/Context.h>
 #include <Processors/Formats/ISchemaReader.h>
-#include <Storages/IStorage.h>
 #include <Common/assert_cast.h>
-#include <IO/WithFileName.h>
 #include <IO/WithFileSize.h>
-
+#include <IO/EmptyReadBuffer.h>
 
 namespace DB
 {
@@ -55,6 +52,10 @@ ColumnsDescription readSchemaFromFormat(
 try
 {
     NamesAndTypesList names_and_types;
+    SchemaInferenceMode mode = context->getSettingsRef().schema_inference_mode;
+    if (mode == SchemaInferenceMode::UNION && !FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns", format_name);
+
     if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name))
     {
         auto external_schema_reader = FormatFactory::instance().getExternalSchemaReader(format_name, context, format_settings);
@@ -71,6 +72,11 @@ try
     }
     else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name))
     {
+        if (mode == SchemaInferenceMode::UNION)
+            retry = false;
+
+        std::vector<std::pair<NamesAndTypesList, String>> schemas_for_union_mode;
+        std::optional<ColumnsDescription> cached_columns;
         std::string exception_messages;
         SchemaReaderPtr schema_reader;
         size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference
@@ -84,7 +90,15 @@ try
             try
             {
                 read_buffer_iterator.setPreviousReadBuffer(std::move(buf));
-                buf = read_buffer_iterator.next();
+                std::tie(buf, cached_columns) = read_buffer_iterator.next();
+                if (cached_columns)
+                {
+                    if (mode == SchemaInferenceMode::DEFAULT)
+                        return *cached_columns;
+                    schemas_for_union_mode.emplace_back(cached_columns->getAll(), read_buffer_iterator.getLastFileName());
+                    continue;
+                }
+
                 if (!buf)
                     break;
 
@@ -136,12 +150,19 @@ try
                 auto num_rows = schema_reader->readNumberOrRows();
                 if (num_rows)
                     read_buffer_iterator.setNumRowsToLastFile(*num_rows);
-                break;
+
+                /// In default mode, we finish when schema is inferred successfully from any file.
+                if (mode == SchemaInferenceMode::DEFAULT)
+                    break;
+
+                if (!names_and_types.empty())
+                    read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
+                schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
             }
             catch (...)
             {
                 auto exception_message = getCurrentExceptionMessage(false);
-                if (schema_reader)
+                if (schema_reader && mode == SchemaInferenceMode::DEFAULT)
                 {
                     size_t rows_read = schema_reader->getNumRowsRead();
                     assert(rows_read <= max_rows_to_read);
@@ -190,8 +211,58 @@ try
             }
         }
 
-        if (auto cached_columns = read_buffer_iterator.getCachedColumns())
-            return *cached_columns;
+        /// If we got all schemas from cache, schema_reader can be uninitialized.
+        /// But we still need some stateless methods of ISchemaReader,
+        /// let's initialize it with empty buffer.
+        EmptyReadBuffer empty;
+        if (!schema_reader)
+            schema_reader = FormatFactory::instance().getSchemaReader(format_name, empty, context, format_settings);
+
+        if (mode == SchemaInferenceMode::UNION)
+        {
+            Names names_order; /// Try to save original columns order;
+            std::unordered_map<String, DataTypePtr> names_to_types;
+
+
+            for (const auto & [schema, file_name] : schemas_for_union_mode)
+            {
+                for (const auto & [name, type] : schema)
+                {
+                    auto it = names_to_types.find(name);
+                    if (it == names_to_types.end())
+                    {
+                        names_order.push_back(name);
+                        names_to_types[name] = type;
+                    }
+                    else
+                    {
+                        /// We already have column with such name.
+                        /// Check if types are the same.
+                        if (!type->equals(*it->second))
+                        {
+                            /// If types are not the same, try to transform them according
+                            /// to the format to find common type.
+                            auto new_type_copy = type;
+                            schema_reader->transformTypesFromDifferentFilesIfNeeded(it->second, new_type_copy);
+
+                            /// If types are not the same after transform, we cannot do anything, throw an exception.
+                            if (!it->second->equals(*new_type_copy))
+                                throw Exception(
+                                    ErrorCodes::TYPE_MISMATCH,
+                                    "Automatically inferred type {} for column '{}'{} differs from type inferred from previous files: {}",
+                                    type->getName(),
+                                    name,
+                                    file_name.empty() ? "" : " in file " + file_name,
+                                    it->second->getName());
+                        }
+                    }
+                }
+            }
+
+            names_and_types.clear();
+            for (const auto & name : names_order)
+                names_and_types.emplace_back(name, names_to_types[name]);
+        }
 
         if (names_and_types.empty())
             throw Exception(
@@ -206,7 +277,7 @@ try
         /// It will allow to execute simple data loading with query
         /// "INSERT INTO table SELECT * FROM ..."
         const auto & insertion_table = context->getInsertionTable();
-        if (!schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
+        if (schema_reader && !schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
         {
             auto storage = DatabaseCatalog::instance().getTable(insertion_table, context);
             auto metadata = storage->getInMemoryMetadataPtr();
@@ -226,13 +297,15 @@ try
     names_and_types.erase(
         std::remove_if(names_and_types.begin(), names_and_types.end(), [](const NameAndTypePair & pair) { return pair.name.empty(); }),
         names_and_types.end());
-    return ColumnsDescription(names_and_types);
+
+    auto columns = ColumnsDescription(names_and_types);
+    if (mode == SchemaInferenceMode::DEFAULT)
+        read_buffer_iterator.setResultingSchema(columns);
+    return columns;
 }
 catch (Exception & e)
 {
-    if (!buf)
-        throw;
-    auto file_name = getFileNameFromReadBuffer(*buf);
+    auto file_name = read_buffer_iterator.getLastFileName();
     if (!file_name.empty())
         e.addMessage(fmt::format("(in file/uri {})", file_name));
     throw;
@@ -256,9 +329,9 @@ SchemaCache::Key getKeyForSchemaCache(
     return getKeysForSchemaCache({source}, format, format_settings, context).front();
 }
 
-static SchemaCache::Key makeSchemaCacheKey(const String & source, const String & format, const String & additional_format_info)
+static SchemaCache::Key makeSchemaCacheKey(const String & source, const String & format, const String & additional_format_info, const String & schema_inference_mode)
 {
-    return SchemaCache::Key{source, format, additional_format_info};
+    return SchemaCache::Key{source, format, additional_format_info, schema_inference_mode};
 }
 
 SchemaCache::Keys getKeysForSchemaCache(
@@ -270,13 +343,14 @@ SchemaCache::Keys getKeysForSchemaCache(
     /// For example, for Protobuf format additional information is the path to the schema
     /// and message name.
     String additional_format_info = FormatFactory::instance().getAdditionalInfoForSchemaCache(format, context, format_settings);
+    String schema_inference_mode(magic_enum::enum_name(context->getSettingsRef().schema_inference_mode.value));
     SchemaCache::Keys cache_keys;
     cache_keys.reserve(sources.size());
     std::transform(
         sources.begin(),
         sources.end(),
         std::back_inserter(cache_keys),
-        [&](const auto & source) { return makeSchemaCacheKey(source, format, additional_format_info); });
+        [&](const auto & source) { return makeSchemaCacheKey(source, format, additional_format_info, schema_inference_mode); });
     return cache_keys;
 }
 
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index c769846acbb..aade6b28fb0 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -13,11 +13,23 @@ struct IReadBufferIterator
 
     virtual void setPreviousReadBuffer(std::unique_ptr<ReadBuffer> /* buffer */) {}
 
-    virtual std::unique_ptr<ReadBuffer> next() = 0;
-
-    virtual std::optional<ColumnsDescription> getCachedColumns() { return std::nullopt; }
+    /// Return read buffer of the next file or cached schema.
+    /// In DEFAULT schema inference mode cached schema can be from any file.
+    /// In UNION mode cached schema can be only from current file.
+    /// When there is no files to process, return pair (nullptr, nullopt)
+    virtual std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() = 0;
 
     virtual void setNumRowsToLastFile(size_t /*num_rows*/) {}
+
+    /// Set schema inferred from last file. Used for UNION mode to cache schema
+    /// per file.
+    virtual void setSchemaToLastFile(const ColumnsDescription & /*columns*/) {}
+    /// Set resulting inferred schema. Used for DEFAULT mode to cache schema
+    /// for all files.
+    virtual void setResultingSchema(const ColumnsDescription & /*columns*/) {}
+
+    /// Get last processed file name for better exception messages.
+    virtual String getLastFileName() const { return ""; }
 };
 
 struct SingleReadBufferIterator : public IReadBufferIterator
@@ -27,12 +39,12 @@ public:
     {
     }
 
-    std::unique_ptr<ReadBuffer> next() override
+    std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
     {
         if (done)
-            return nullptr;
+            return {nullptr, {}};
         done = true;
-        return std::move(buf);
+        return {std::move(buf), {}};
     }
 
 private:
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 94166aa9002..13871904c56 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -547,6 +547,54 @@ namespace
         }
     }
 
+    void mergeNamedTuples(DataTypes & data_types, TypeIndexesSet & type_indexes, const FormatSettings & settings, JSONInferenceInfo * json_info)
+    {
+        if (!type_indexes.contains(TypeIndex::Tuple))
+            return;
+
+        /// Collect all names and their types from all named tuples.
+        std::unordered_map<String, DataTypes> names_to_types;
+        /// Try to save original order of element names.
+        Names element_names;
+        for (auto & type : data_types)
+        {
+            const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get());
+            if (tuple_type && tuple_type->haveExplicitNames())
+            {
+                const auto & elements = tuple_type->getElements();
+                const auto & names = tuple_type->getElementNames();
+                for (size_t i = 0; i != elements.size(); ++i)
+                {
+                    if (!names_to_types.contains(names[i]))
+                        element_names.push_back(names[i]);
+                    names_to_types[names[i]].push_back(elements[i]);
+                }
+            }
+        }
+
+        /// Try to find common type for each tuple element with the same name.
+        DataTypes element_types;
+        element_types.reserve(names_to_types.size());
+        for (const auto & name : element_names)
+        {
+            auto types = names_to_types[name];
+            transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
+            /// If some element have different types in different tuples, we can't do anything
+            if (!checkIfTypesAreEqual(types))
+                return;
+            element_types.push_back(types.front());
+        }
+
+        DataTypePtr result_tuple = std::make_shared<DataTypeTuple>(element_types, element_names);
+
+        for (auto & type : data_types)
+        {
+            const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get());
+            if (tuple_type && tuple_type->haveExplicitNames())
+                type = result_tuple;
+        }
+    }
+
     template <bool is_json>
     void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
     {
@@ -604,6 +652,9 @@ namespace
 
             if (settings.json.read_objects_as_strings)
                 transformMapsAndStringsToStrings(data_types, type_indexes);
+
+            if (json_info && json_info->allow_merging_named_tuples)
+                mergeNamedTuples(data_types, type_indexes, settings, json_info);
         };
 
         transformTypesRecursively(types, transform_simple_types, transform_complex_types);
@@ -1180,6 +1231,13 @@ void transformInferredJSONTypesIfNeeded(
     second = std::move(types[1]);
 }
 
+void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
+{
+    JSONInferenceInfo json_info;
+    json_info.allow_merging_named_tuples = true;
+    transformInferredJSONTypesIfNeeded(first, second, settings, &json_info);
+}
+
 void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info, bool remain_nothing_types = false)
 {
     if (!data_type)
diff --git a/src/Formats/SchemaInferenceUtils.h b/src/Formats/SchemaInferenceUtils.h
index efeb6c9c873..b492d9b22b6 100644
--- a/src/Formats/SchemaInferenceUtils.h
+++ b/src/Formats/SchemaInferenceUtils.h
@@ -14,6 +14,11 @@ struct JSONInferenceInfo
     std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
     /// Indicates if currently we are inferring type for Map/Object key.
     bool is_object_key = false;
+    /// When we transform types for the same column from different files
+    /// we cannot use DataTypeJSONPaths for inferring named tuples from JSON objects,
+    /// because DataTypeJSONPaths was already finalized to named tuple. IN this case
+    /// we can only merge named tuples from different files together.
+    bool allow_merging_named_tuples = false;
 };
 
 /// Try to determine datatype of the value in buffer/string. If the type cannot be inferred, return nullptr.
@@ -64,9 +69,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c
 ///     from strings in json_info while inference and use it here, so we will know that Array(Int64) contains
 ///     integer inferred from a string.
 /// Example 2:
-///     When we have maps with different value types, we convert all types to JSON object type.
-///     For example, if we have Map(String, UInt64) (like `{"a" : 123}`) and Map(String, String) (like `{"b" : 'abc'}`)
-///     we will convert both types to Object('JSON').
+///     We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths.
 void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info);
 
 /// Make final transform for types inferred in JSON format. It does 3 types of transformation:
@@ -78,6 +81,11 @@ void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & secon
 /// 3) Converts all Nothing types to String types if input_format_json_infer_incomplete_types_as_strings is enabled.
 void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info);
 
+/// Transform types for the same column inferred from different files.
+/// Does the same as transformInferredJSONTypesIfNeeded, but also merges named Tuples together,
+/// because DataTypeJSONPaths types were finalized when we finished inference for a file.
+void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings);
+
 /// Make type Nullable recursively:
 /// - Type -> Nullable(type)
 /// - Array(Type) -> Array(Nullable(Type))
diff --git a/src/Functions/FunctionGenerateRandomStructure.cpp b/src/Functions/FunctionGenerateRandomStructure.cpp
index f85b2596530..8e086f075a4 100644
--- a/src/Functions/FunctionGenerateRandomStructure.cpp
+++ b/src/Functions/FunctionGenerateRandomStructure.cpp
@@ -34,7 +34,7 @@ namespace
     const size_t MAX_DECIMAL256_PRECISION = 76;
     const size_t MAX_DEPTH = 16;
 
-    constexpr std::array<TypeIndex, 29> simple_types
+    constexpr std::array<TypeIndex, 28> simple_types
     {
         TypeIndex::Int8,
         TypeIndex::UInt8,
@@ -64,7 +64,7 @@ namespace
         TypeIndex::Enum16,
         TypeIndex::IPv4,
         TypeIndex::IPv6,
-        TypeIndex::UUID,
+//        TypeIndex::UUID,
     };
 
     constexpr std::array<TypeIndex, 5> complex_types
@@ -76,7 +76,7 @@ namespace
         TypeIndex::Map,
     };
 
-    constexpr std::array<TypeIndex, 22> map_key_types
+    constexpr std::array<TypeIndex, 21> map_key_types
     {
         TypeIndex::Int8,
         TypeIndex::UInt8,
@@ -98,11 +98,11 @@ namespace
         TypeIndex::IPv4,
         TypeIndex::Enum8,
         TypeIndex::Enum16,
-        TypeIndex::UUID,
+//        TypeIndex::UUID,
         TypeIndex::LowCardinality,
     };
 
-    constexpr std::array<TypeIndex, 22> suspicious_lc_types
+    constexpr std::array<TypeIndex, 21> suspicious_lc_types
     {
         TypeIndex::Int8,
         TypeIndex::UInt8,
@@ -125,7 +125,7 @@ namespace
         TypeIndex::FixedString,
         TypeIndex::IPv4,
         TypeIndex::IPv6,
-        TypeIndex::UUID,
+//        TypeIndex::UUID,
     };
 
     template <bool allow_complex_types>
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 15b53c2a499..26c632b83dc 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -2,6 +2,7 @@
 #include <Formats/SchemaInferenceUtils.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/getLeastSupertype.h>
 #include <Common/logger_useful.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <boost/algorithm/string.hpp>
@@ -62,6 +63,14 @@ void checkFinalInferredType(
         type = removeNullable(type);
 }
 
+void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
+{
+    DataTypes types = {type, new_type};
+    auto least_supertype = tryGetLeastSupertype(types);
+    if (least_supertype)
+        type = new_type = least_supertype;
+}
+
 IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_)
     : ISchemaReader(in_)
     , max_rows_to_read(format_settings_.max_rows_to_read_for_schema_inference)
@@ -86,11 +95,6 @@ void IIRowSchemaReader::setContext(ContextPtr & context)
     }
 }
 
-void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
-{
-    transformInferredTypesIfNeeded(type, new_type, format_settings);
-}
-
 IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
     : IIRowSchemaReader(in_, format_settings_), column_names(splitColumnNames(format_settings.column_names_for_schema_inference))
 {
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
index e6402ac0249..94df71a88b4 100644
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -39,6 +39,9 @@ public:
     virtual void setMaxRowsAndBytesToRead(size_t, size_t) {}
     virtual size_t getNumRowsRead() const { return 0; }
 
+    virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
+    virtual void transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { transformTypesIfNeeded(type, new_type); }
+
     virtual ~ISchemaReader() = default;
 
 protected:
@@ -55,8 +58,6 @@ public:
     bool needContext() const override { return !hints_str.empty(); }
     void setContext(ContextPtr & context) override;
 
-    virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
-
 protected:
     void setMaxRowsAndBytesToRead(size_t max_rows, size_t max_bytes) override
     {
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
index 26bd0847fb7..1c148f5b3d3 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@@ -230,6 +230,11 @@ void JSONColumnsSchemaReaderBase::transformTypesIfNeeded(DataTypePtr & type, Dat
     transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
 }
 
+void JSONColumnsSchemaReaderBase::transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
+{
+    transformInferredJSONTypesFromDifferentFilesIfNeeded(type, new_type, format_settings);
+}
+
 NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
 {
     std::unordered_map<String, DataTypePtr> names_to_types;
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
index bb52e2aa516..53d65bb3539 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
@@ -80,7 +80,8 @@ class JSONColumnsSchemaReaderBase : public ISchemaReader
 public:
     JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_);
 
-    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+    void transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
     bool needContext() const override { return !hints_str.empty(); }
     void setContext(ContextPtr & ctx) override;
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index 99186d0eb6d..b301b9527c1 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -228,6 +228,11 @@ void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & typ
     transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
 }
 
+void JSONCompactEachRowRowSchemaReader::transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
+{
+    transformInferredJSONTypesFromDifferentFilesIfNeeded(type, new_type, format_settings);
+}
+
 void JSONCompactEachRowRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type)
 {
     transformFinalInferredJSONTypeIfNeeded(type, format_settings, &inference_info);
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index 2e255a55d57..463d3c53a65 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -92,6 +92,7 @@ private:
     std::optional<DataTypes> readRowAndGetDataTypesImpl() override;
 
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+    void transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
     void transformFinalTypeIfNeeded(DataTypePtr & type) override;
 
     JSONCompactEachRowFormatReader reader;
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 30b08cd1d9c..95563fd2f62 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -365,6 +365,11 @@ void JSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTyp
     transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
 }
 
+void JSONEachRowSchemaReader::transformTypesFromDifferentFilesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
+{
+    transformInferredJSONTypesFromDifferentFilesIfNeeded(type, new_type, format_settings);
+}
+
 void JSONEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type)
 {
     transformFinalInferredJSONTypeIfNeeded(type, format_settings, &inference_info);
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
index ad494d07fbc..3ff1b6d317c 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@@ -104,6 +104,7 @@ public:
 private:
     NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override;
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+    void transformTypesFromDifferentFilesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
     void transformFinalTypeIfNeeded(DataTypePtr & type) override;
 
     bool first_row = true;
diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
index 6ae32aa5842..7e8b4accf4d 100644
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
@@ -462,6 +462,11 @@ std::optional<DataTypes> MySQLDumpSchemaReader::readRowAndGetDataTypes()
     return data_types;
 }
 
+void MySQLDumpSchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
+{
+    transformInferredTypesIfNeeded(type, new_type, format_settings);
+}
+
 void registerInputFormatMySQLDump(FormatFactory & factory)
 {
     factory.registerInputFormat("MySQLDump", [](
diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
index 4148b6e79a3..18b403b5f9a 100644
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
@@ -37,6 +37,7 @@ public:
 private:
     NamesAndTypesList readSchema() override;
     std::optional<DataTypes> readRowAndGetDataTypes() override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
     String table_name;
 };
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
index e4a34f3c4f2..c3b6290281a 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
@@ -90,7 +90,6 @@ private:
 
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
-
     using EscapingRule = FormatSettings::EscapingRule;
     RegexpFieldExtractor field_extractor;
     PeekableReadBuffer buf;
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index b0ee2f7797a..d55ccce8879 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -701,6 +701,11 @@ std::optional<DataTypes> ValuesSchemaReader::readRowAndGetDataTypes()
     return data_types;
 }
 
+void ValuesSchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
+{
+    transformInferredTypesIfNeeded(type, new_type, format_settings);
+}
+
 void registerInputFormatValues(FormatFactory & factory)
 {
     factory.registerInputFormat("Values", [](
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
index e8c3b555994..643213695a2 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@@ -111,6 +111,7 @@ public:
 
 private:
     std::optional<DataTypes> readRowAndGetDataTypes() override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
     PeekableReadBuffer buf;
     ParserExpression parser;
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index a6514257dd3..ffee2bea1cc 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -561,5 +561,10 @@ std::vector<String> FormatWithNamesAndTypesSchemaReader::readNamesFromFields(con
     return names;
 }
 
+void FormatWithNamesAndTypesSchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
+{
+    transformInferredTypesIfNeeded(type, new_type, format_settings);
+}
+
 }
 
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index c263b3b9666..d0457990714 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -172,6 +172,8 @@ public:
 
     NamesAndTypesList readSchema() override;
 
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+
 protected:
     virtual std::optional<DataTypes> readRowAndGetDataTypes() override;
 
diff --git a/src/Storages/Cache/SchemaCache.h b/src/Storages/Cache/SchemaCache.h
index 6f1ce917852..1bfc18bddab 100644
--- a/src/Storages/Cache/SchemaCache.h
+++ b/src/Storages/Cache/SchemaCache.h
@@ -29,10 +29,11 @@ public:
         String source;
         String format;
         String additional_format_info;
+        String schema_inference_mode;
 
         bool operator==(const Key & other) const
         {
-            return source == other.source && format == other.format && additional_format_info == other.additional_format_info;
+            return source == other.source && format == other.format && additional_format_info == other.additional_format_info && schema_inference_mode == other.schema_inference_mode;
         }
     };
 
@@ -42,7 +43,7 @@ public:
     {
         size_t operator()(const Key & key) const
         {
-            return std::hash<String>()(key.source + key.format + key.additional_format_info);
+            return std::hash<String>()(key.source + key.format + key.additional_format_info + key.schema_inference_mode);
         }
     };
 
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 99c3e6e29cf..75330ed7db1 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -302,10 +302,17 @@ namespace
         {
         }
 
-        std::unique_ptr<ReadBuffer> next() override
+        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
         {
-            StorageHDFS::PathWithInfo path_with_info;
             bool is_first = current_index == 0;
+            /// For default mode check cached columns for all paths on first iteration.
+            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                if (auto cached_columns = tryGetColumnsFromCache(paths_with_info))
+                    return {nullptr, cached_columns};
+            }
+
+            StorageHDFS::PathWithInfo path_with_info;
 
             while (true)
             {
@@ -315,26 +322,33 @@ namespace
                         throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                                         "Cannot extract table structure from {} format file, because all files are empty. "
                                         "You must specify table structure manually", format);
-                    return nullptr;
+                    return {nullptr, std::nullopt};
                 }
 
                 path_with_info = paths_with_info[current_index++];
                 if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0)
                     continue;
 
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+                {
+                    std::vector<StorageHDFS::PathWithInfo> paths = {path_with_info};
+                    if (auto cached_columns = tryGetColumnsFromCache(paths))
+                        return {nullptr, cached_columns};
+                }
+
                 auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
                 auto impl = std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
                 if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof())
                 {
                     const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-                    return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
+                    return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max)), std::nullopt};
                 }
             }
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs)
                 return;
 
             String source = uri_without_path + paths_with_info[current_index - 1].path;
@@ -342,7 +356,68 @@ namespace
             StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
+        void setSchemaToLastFile(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
+
+            String source = uri_without_path + paths_with_info[current_index - 1].path;
+            auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext());
+            StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns);
+        }
+
+        void setResultingSchema(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
+                return;
+
+            Strings sources;
+            sources.reserve(paths_with_info.size());
+            std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; });
+            auto cache_keys = getKeysForSchemaCache(sources, format, {}, getContext());
+            StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
+        }
+
+        String getLastFileName() const override
+        {
+            if (current_index != 0)
+                return paths_with_info[current_index - 1].path;
+
+            return "";
+        }
+
     private:
+        std::optional<ColumnsDescription> tryGetColumnsFromCache(const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_)
+        {
+            auto & schema_cache = StorageHDFS::getSchemaCache(getContext());
+            for (const auto & path_with_info : paths_with_info_)
+            {
+                auto get_last_mod_time = [&]() -> std::optional<time_t>
+                {
+                    if (path_with_info.info)
+                        return path_with_info.info->last_mod_time;
+
+                    auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef());
+                    auto fs = createHDFSFS(builder.get());
+                    HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str()));
+                    if (hdfs_info)
+                        return hdfs_info->mLastMod;
+
+                    return std::nullopt;
+                };
+
+                String url = uri_without_path + path_with_info.path;
+                auto cache_key = getKeyForSchemaCache(url, format, {}, getContext());
+                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+                if (columns)
+                    return columns;
+            }
+
+            return std::nullopt;
+        }
+
         const std::vector<StorageHDFS::PathWithInfo> & paths_with_info;
         const String & uri_without_path;
         const String & format;
@@ -366,25 +441,8 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(
             "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path."
             " You must specify table structure manually", format);
 
-    std::optional<ColumnsDescription> columns_from_cache;
-    if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs)
-        columns_from_cache = tryGetColumnsFromCache(paths_with_info, uri_without_path, format, ctx);
-
-    ColumnsDescription columns;
-    if (columns_from_cache)
-    {
-        columns = *columns_from_cache;
-    }
-    else
-    {
-        ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx);
-        columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx);
-    }
-
-    if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs)
-        addColumnsToCache(paths_with_info, uri_without_path, columns, format, ctx);
-
-    return columns;
+    ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx);
+    return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx);
 }
 
 class HDFSSource::DisclosedGlobIterator::Impl
@@ -1017,54 +1075,6 @@ SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx)
     return schema_cache;
 }
 
-std::optional<ColumnsDescription> StorageHDFS::tryGetColumnsFromCache(
-    const std::vector<StorageHDFS::PathWithInfo> & paths_with_info,
-    const String & uri_without_path,
-    const String & format_name,
-    const ContextPtr & ctx)
-{
-    auto & schema_cache = getSchemaCache(ctx);
-    for (const auto & path_with_info : paths_with_info)
-    {
-        auto get_last_mod_time = [&]() -> std::optional<time_t>
-        {
-            if (path_with_info.info)
-                return path_with_info.info->last_mod_time;
-
-            auto builder = createHDFSBuilder(uri_without_path + "/", ctx->getGlobalContext()->getConfigRef());
-            auto fs = createHDFSFS(builder.get());
-            HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str()));
-            if (hdfs_info)
-                return hdfs_info->mLastMod;
-
-            return std::nullopt;
-        };
-
-        String url = uri_without_path + path_with_info.path;
-        auto cache_key = getKeyForSchemaCache(url, format_name, {}, ctx);
-        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        if (columns)
-            return columns;
-    }
-
-    return std::nullopt;
-}
-
-void StorageHDFS::addColumnsToCache(
-    const std::vector<StorageHDFS::PathWithInfo> & paths_with_info,
-    const String & uri_without_path,
-    const ColumnsDescription & columns,
-    const String & format_name,
-    const ContextPtr & ctx)
-{
-    auto & schema_cache = getSchemaCache(ctx);
-    Strings sources;
-    sources.reserve(paths_with_info.size());
-    std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; });
-    auto cache_keys = getKeysForSchemaCache(sources, format_name, {}, ctx);
-    schema_cache.addManyColumns(cache_keys, columns);
-}
-
 }
 
 #endif
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index ffbf4e93ff9..babcab3ed16 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -94,19 +94,6 @@ protected:
     friend class HDFSSource;
 
 private:
-    static std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const std::vector<StorageHDFS::PathWithInfo> & paths_with_info,
-        const String & uri_without_path,
-        const String & format_name,
-        const ContextPtr & ctx);
-
-    static void addColumnsToCache(
-        const std::vector<StorageHDFS::PathWithInfo> & paths,
-        const String & uri_without_path,
-        const ColumnsDescription & columns,
-        const String & format_name,
-        const ContextPtr & ctx);
-
     std::vector<String> uris;
     String format_name;
     String compression_method;
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index a4a686b2691..796c732ba3f 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1215,11 +1215,18 @@ namespace
         {
         }
 
-        std::unique_ptr<ReadBuffer> next() override
+        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
         {
-            auto [key, metadata] = file_iterator->next();
+            /// For default mode check cached columns for currently read keys on first iteration.
+            if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+                    return {nullptr, cached_columns};
+            }
 
-            if (key.empty())
+            current_path_with_metadata = file_iterator->next();
+
+            if (current_path_with_metadata.relative_path.empty())
             {
                 if (first)
                     throw Exception(
@@ -1227,49 +1234,102 @@ namespace
                         "Cannot extract table structure from {} format file, because there are no files with provided path "
                         "in AzureBlobStorage. You must specify table structure manually", configuration.format);
 
-                return nullptr;
+                return {nullptr, std::nullopt};
             }
 
-            current_path = key;
+            first = false;
 
-            ///AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache.
-            if (getContext()->getSettingsRef().schema_inference_use_cache_for_azure && read_keys.size() > prev_read_keys_size)
+            /// AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
+            if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size)
             {
-                columns_from_cache = StorageAzureBlob::tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext());
+                auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
                 prev_read_keys_size = read_keys.size();
                 if (columns_from_cache)
-                    return nullptr;
+                    return {nullptr, columns_from_cache};
+            }
+            else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+            {
+                RelativePathsWithMetadata paths = {current_path_with_metadata};
+                if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
+                    return {nullptr, columns_from_cache};
             }
 
             first = false;
             int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
-            return wrapReadBufferWithCompressionMethod(
-                object_storage->readObject(StoredObject(key), getContext()->getReadSettings(), {}, metadata.size_bytes),
-                chooseCompressionMethod(key, configuration.compression_method),
-                zstd_window_log_max);
+            return {wrapReadBufferWithCompressionMethod(
+                object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes),
+                chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method),
+                zstd_window_log_max), std::nullopt};
         }
 
-        std::optional<ColumnsDescription> getCachedColumns() override { return columns_from_cache; }
-
         void setNumRowsToLastFile(size_t num_rows) override
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure)
                 return;
 
-            String source = fs::path(configuration.connection_url) / configuration.container / current_path;
+            String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path;
             auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
             StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
+        void setSchemaToLastFile(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
+
+            String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path;
+            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns);
+        }
+
+        void setResultingSchema(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
+
+            auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
+            Strings sources;
+            sources.reserve(read_keys.size());
+            std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; });
+            auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
+            StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
+        }
+
+        String getLastFileName() const override { return current_path_with_metadata.relative_path; }
+
     private:
+        std::optional<ColumnsDescription> tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end)
+        {
+            auto & schema_cache = StorageAzureBlob::getSchemaCache(getContext());
+            for (auto it = begin; it < end; ++it)
+            {
+                auto get_last_mod_time = [&] -> std::optional<time_t>
+                {
+                    if (it->metadata.last_modified)
+                        return it->metadata.last_modified->epochTime();
+                    return std::nullopt;
+                };
+
+                auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
+                String source = host_and_bucket + '/' + it->relative_path;
+                auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+                if (columns)
+                    return columns;
+            }
+
+            return std::nullopt;
+        }
+
         std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
         AzureObjectStorage * object_storage;
         const StorageAzureBlob::Configuration & configuration;
         const std::optional<FormatSettings> & format_settings;
         const RelativePathsWithMetadata & read_keys;
-        std::optional<ColumnsDescription> columns_from_cache;
         size_t prev_read_keys_size;
-        String current_path;
+        RelativePathWithMetadata current_path_with_metadata;
         bool first = true;
     };
 }
@@ -1299,72 +1359,8 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData(
             object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys);
     }
 
-    std::optional<ColumnsDescription> columns_from_cache;
-    if (ctx->getSettingsRef().schema_inference_use_cache_for_azure)
-        columns_from_cache = tryGetColumnsFromCache(read_keys.begin(), read_keys.end(), configuration, format_settings, ctx);
-
-    ColumnsDescription columns;
-    if (columns_from_cache)
-    {
-        columns = *columns_from_cache;
-    }
-    else
-    {
-        ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx);
-        columns = readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
-    }
-
-    if (ctx->getSettingsRef().schema_inference_use_cache_for_azure)
-        addColumnsToCache(read_keys, columns, configuration, format_settings, configuration.format, ctx);
-
-    return columns;
-
-}
-
-std::optional<ColumnsDescription> StorageAzureBlob::tryGetColumnsFromCache(
-        const RelativePathsWithMetadata::const_iterator & begin,
-        const RelativePathsWithMetadata::const_iterator & end,
-        const StorageAzureBlob::Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & ctx)
-{
-    auto & schema_cache = getSchemaCache(ctx);
-    for (auto it = begin; it < end; ++it)
-    {
-        auto get_last_mod_time = [&] -> std::optional<time_t>
-        {
-            if (it->metadata.last_modified)
-                return it->metadata.last_modified->epochTime();
-            return std::nullopt;
-        };
-
-        auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
-        String source = host_and_bucket + '/' + it->relative_path;
-        auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, ctx);
-        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        if (columns)
-            return columns;
-    }
-
-    return std::nullopt;
-
-}
-
-void StorageAzureBlob::addColumnsToCache(
-    const RelativePathsWithMetadata & keys,
-    const ColumnsDescription & columns,
-    const StorageAzureBlob::Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    const String & format_name,
-    const ContextPtr & ctx)
-{
-    auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
-    Strings sources;
-    sources.reserve(keys.size());
-    std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; });
-    auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx);
-    auto & schema_cache = getSchemaCache(ctx);
-    schema_cache.addManyColumns(cache_keys, columns);
+    ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx);
+    return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
 }
 
 SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx)
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index b97dee0caed..0862e8cc2bf 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -124,21 +124,6 @@ public:
         ContextPtr ctx,
         bool distributed_processing = false);
 
-    static std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const RelativePathsWithMetadata::const_iterator & begin,
-        const RelativePathsWithMetadata::const_iterator & end,
-        const StorageAzureBlob::Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & ctx);
-
-    static void addColumnsToCache(
-        const RelativePathsWithMetadata & keys,
-        const ColumnsDescription & columns,
-        const Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        const String & format_name,
-        const ContextPtr & ctx);
-
 private:
     std::string name;
     Configuration configuration;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 856c1f21d27..1f4d14218aa 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -444,11 +444,19 @@ namespace
         {
         }
 
-        std::unique_ptr<ReadBuffer> next() override
+        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
         {
+            bool is_first = current_index == 0;
+            /// For default mode check cached columns for all paths on first iteration.
+            /// If we have cached columns, next() won't be called again.
+            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                if (auto cached_columns = tryGetColumnsFromCache(paths))
+                    return {nullptr, cached_columns};
+            }
+
             String path;
             struct stat file_stat;
-            bool is_first = current_index == 0;
 
             do
             {
@@ -459,14 +467,21 @@ namespace
                             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                             "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
                             format);
-                    return nullptr;
+                    return {nullptr, std::nullopt};
                 }
 
                 path = paths[current_index++];
                 file_stat = getFileStat(path, false, -1, "File");
             } while (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
 
-            return createReadBuffer(path, file_stat, false, -1, compression_method, getContext());
+            /// For union mode, check cached columns only for current path, because schema can be different for different files.
+            if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+            {
+                if (auto cached_columns = tryGetColumnsFromCache({path}))
+                    return {nullptr, cached_columns};
+            }
+
+            return {createReadBuffer(path, file_stat, false, -1, compression_method, getContext()), std::nullopt};
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
@@ -478,7 +493,64 @@ namespace
             StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
+        void setSchemaToLastFile(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
+
+            /// For union mode, schema can be different for different files, so we need to
+            /// cache last inferred schema only for last processed file.
+            auto cache_key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext());
+            StorageFile::getSchemaCache(getContext()).addColumns(cache_key, columns);
+        }
+
+        void setResultingSchema(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
+                return;
+
+            /// For default mode we cache resulting schema for all paths.
+            auto cache_keys = getKeysForSchemaCache(paths, format, format_settings, getContext());
+            StorageFile::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
+        }
+
+        String getLastFileName() const override
+        {
+            if (current_index != 0)
+                return paths[current_index - 1];
+            return "";
+        }
+
     private:
+        std::optional<ColumnsDescription> tryGetColumnsFromCache(const Strings & paths_)
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file)
+                return std::nullopt;
+
+            /// Check if the cache contains one of the paths.
+            auto & schema_cache = StorageFile::getSchemaCache(getContext());
+            struct stat file_stat{};
+            for (const auto & path : paths_)
+            {
+                auto get_last_mod_time = [&]() -> std::optional<time_t>
+                {
+                    if (0 != stat(path.c_str(), &file_stat))
+                        return std::nullopt;
+
+                    return file_stat.st_mtime;
+                };
+
+                auto cache_key = getKeyForSchemaCache(path, format, format_settings, getContext());
+                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+                if (columns)
+                    return columns;
+            }
+
+            return std::nullopt;
+        }
+
         const std::vector<String> & paths;
 
         size_t current_index = 0;
@@ -502,8 +574,19 @@ namespace
         {
         }
 
-        std::unique_ptr<ReadBuffer> next() override
+        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
         {
+            /// For default mode check cached columns for all initial archive paths (maybe with globs) on first iteration.
+            /// If we have cached columns, next() won't be called again.
+            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                for (const auto & archive : archive_info.paths_to_archives)
+                {
+                    if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, archive_info.path_in_archive))
+                        return {nullptr, cached_columns};
+                }
+            }
+
             std::unique_ptr<ReadBuffer> read_buf;
             while (true)
             {
@@ -515,7 +598,7 @@ namespace
                             "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
                             format);
 
-                    return nullptr;
+                    return {nullptr, std::nullopt};
                 }
 
                 const auto & archive = archive_info.paths_to_archives[current_archive_index];
@@ -546,11 +629,11 @@ namespace
                     if (!read_buf)
                         continue;
 
-                    last_read_file_path = processed_files.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), archive_info.path_in_archive));
-                    columns_from_cache = tryGetColumnsFromSchemaCache(archive, last_read_file_path);
+                    last_read_file_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), archive_info.path_in_archive));
+                    is_first = false;
 
-                    if (columns_from_cache)
-                        return nullptr;
+                    if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, last_read_file_path))
+                        return {nullptr, cached_columns};
                 }
                 else
                 {
@@ -583,11 +666,17 @@ namespace
                         continue;
                     }
 
-                    last_read_file_path = processed_files.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), *filename));
-                    columns_from_cache = tryGetColumnsFromSchemaCache(archive, last_read_file_path);
+                    last_read_file_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), *filename));
+                    is_first = false;
 
-                    if (columns_from_cache)
-                        return nullptr;
+                    if (auto cached_columns = tryGetColumnsFromSchemaCache(archive, last_read_file_path))
+                    {
+                        /// For union mode next() will be called again even if we found cached columns,
+                        /// so we need to remember last_read_buffer to continue iterating through files in archive.
+                        if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+                            last_read_buffer = archive_reader->readFile(std::move(file_enumerator));
+                        return {nullptr, cached_columns};
+                    }
 
                     read_buf = archive_reader->readFile(std::move(file_enumerator));
                 }
@@ -595,18 +684,13 @@ namespace
                 break;
             }
 
-            is_first = false;
-            return read_buf;
-        }
-
-        std::optional<ColumnsDescription> getCachedColumns() override
-        {
-            return columns_from_cache;
+            return {std::move(read_buf), std::nullopt};
         }
 
         void setPreviousReadBuffer(std::unique_ptr<ReadBuffer> buffer) override
         {
-            last_read_buffer = std::move(buffer);
+            if (buffer)
+                last_read_buffer = std::move(buffer);
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
@@ -618,13 +702,45 @@ namespace
             StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
-        std::vector<std::string> processed_files;
-    private:
+        void setSchemaToLastFile(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
 
+            /// For union mode, schema can be different for different files in archive, so we need to
+            /// cache last inferred schema only for last processed file.
+            auto & schema_cache = StorageFile::getSchemaCache(getContext());
+            auto cache_key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext());
+            schema_cache.addColumns(cache_key, columns);
+        }
+
+        void setResultingSchema(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
+                return;
+
+            /// For default mode we cache resulting schema for all paths.
+            /// Also add schema for initial paths (maybe with globes) in cache,
+            /// so next time we won't iterate through files (that can be expensive).
+            for (const auto & archive : archive_info.paths_to_archives)
+                paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive, archive_info.path_in_archive));
+            auto & schema_cache = StorageFile::getSchemaCache(getContext());
+            auto cache_keys = getKeysForSchemaCache(paths_for_schema_cache, format, format_settings, getContext());
+            schema_cache.addManyColumns(cache_keys, columns);
+        }
+
+        String getLastFileName() const override
+        {
+            return last_read_file_path;
+        }
+
+    private:
         std::optional<ColumnsDescription> tryGetColumnsFromSchemaCache(const std::string & archive_path, const std::string & full_path)
         {
             auto context = getContext();
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file)
+            if (!context->getSettingsRef().schema_inference_use_cache_for_file)
                 return std::nullopt;
 
             struct stat file_stat;
@@ -654,44 +770,13 @@ namespace
 
         std::string last_read_file_path;
 
-        std::optional<ColumnsDescription> columns_from_cache;
-
         std::unique_ptr<IArchiveReader::FileEnumerator> file_enumerator;
         std::unique_ptr<ReadBuffer> last_read_buffer;
 
         String format;
         const std::optional<FormatSettings> & format_settings;
+        std::vector<std::string> paths_for_schema_cache;
     };
-
-    std::optional<ColumnsDescription> tryGetColumnsFromCacheForArchives(
-        const StorageFile::ArchiveInfo & archive_info,
-        std::vector<std::string> & paths_for_schema_cache,
-        const String & format,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & context)
-    {
-        struct stat file_stat{};
-        std::optional<ColumnsDescription> columns_from_cache;
-
-        for (const auto & archive : archive_info.paths_to_archives)
-        {
-            const auto & full_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive, archive_info.path_in_archive));
-
-            auto & schema_cache = StorageFile::getSchemaCache(context);
-            auto get_last_mod_time = [&]() -> std::optional<time_t>
-            {
-                if (0 != stat(archive.c_str(), &file_stat))
-                    return std::nullopt;
-
-                return file_stat.st_mtime;
-            };
-
-            auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context);
-            columns_from_cache = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        }
-
-        return columns_from_cache;
-    }
 }
 
 ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr context)
@@ -744,48 +829,19 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
             "Cannot extract table structure from {} format file, because there are no files with provided path. "
             "You must specify table structure manually", format);
 
-    ColumnsDescription columns;
-    std::vector<std::string> archive_paths_for_schema_cache;
-    std::optional<ColumnsDescription> columns_from_cache;
-
-    if (context->getSettingsRef().schema_inference_use_cache_for_file)
+    if (archive_info)
     {
-        if (archive_info)
-            columns_from_cache = tryGetColumnsFromCacheForArchives(*archive_info, archive_paths_for_schema_cache, format, format_settings, context);
-        else
-            columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context);
+        ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context);
+        return readSchemaFromFormat(
+            format,
+            format_settings,
+            read_buffer_iterator,
+            /*retry=*/archive_info->paths_to_archives.size() > 1 || !archive_info->isSingleFileRead(),
+            context);
     }
 
-    if (columns_from_cache)
-    {
-        columns = std::move(*columns_from_cache);
-    }
-    else
-    {
-        if (archive_info)
-        {
-            ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context);
-            columns = readSchemaFromFormat(
-                format,
-                format_settings,
-                read_buffer_iterator,
-                /*retry=*/archive_info->paths_to_archives.size() > 1 || !archive_info->isSingleFileRead(),
-                context);
-
-            for (auto & file : read_buffer_iterator.processed_files)
-                archive_paths_for_schema_cache.push_back(std::move(file));
-        }
-        else
-        {
-            ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context);
-            columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context);
-        }
-    }
-
-    if (context->getSettingsRef().schema_inference_use_cache_for_file)
-        addColumnsToCache(archive_info.has_value() ? archive_paths_for_schema_cache : paths, columns, format, format_settings, context);
-
-    return columns;
+    ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context);
+    return readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context);
 }
 
 bool StorageFile::supportsSubsetOfColumns(const ContextPtr & context) const
@@ -1972,43 +2028,6 @@ SchemaCache & StorageFile::getSchemaCache(const ContextPtr & context)
     return schema_cache;
 }
 
-std::optional<ColumnsDescription> StorageFile::tryGetColumnsFromCache(
-    const Strings & paths, const String & format_name, const std::optional<FormatSettings> & format_settings, ContextPtr context)
-{
-    /// Check if the cache contains one of the paths.
-    auto & schema_cache = getSchemaCache(context);
-    struct stat file_stat{};
-    for (const auto & path : paths)
-    {
-        auto get_last_mod_time = [&]() -> std::optional<time_t>
-        {
-            if (0 != stat(path.c_str(), &file_stat))
-                return std::nullopt;
-
-            return file_stat.st_mtime;
-        };
-
-        auto cache_key = getKeyForSchemaCache(path, format_name, format_settings, context);
-        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        if (columns)
-            return columns;
-    }
-
-    return std::nullopt;
-}
-
-void StorageFile::addColumnsToCache(
-    const Strings & paths,
-    const ColumnsDescription & columns,
-    const String & format_name,
-    const std::optional<FormatSettings> & format_settings,
-    const ContextPtr & context)
-{
-    auto & schema_cache = getSchemaCache(context);
-    auto cache_keys = getKeysForSchemaCache(paths, format_name, format_settings, context);
-    schema_cache.addManyColumns(cache_keys, columns);
-}
-
 void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive)
 {
     size_t pos = source.find("::");
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index f1464b90ab4..0bdbd642459 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -126,16 +126,6 @@ protected:
 private:
     void setStorageMetadata(CommonArguments args);
 
-    static std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const Strings & paths, const String & format_name, const std::optional<FormatSettings> & format_settings, ContextPtr context);
-
-    static void addColumnsToCache(
-        const Strings & paths,
-        const ColumnsDescription & columns,
-        const String & format_name,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & context);
-
     std::string format_name;
     // We use format settings from global context + CREATE query for File table
     // function -- in this case, format_settings is set.
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index f26d59cb559..18abd66cec9 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1499,8 +1499,15 @@ namespace
         {
         }
 
-        std::unique_ptr<ReadBuffer> next() override
+        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
         {
+            /// For default mode check cached columns for currently read keys on first iteration.
+            if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+                    return {nullptr, cached_columns};
+            }
+
             while (true)
             {
                 current_key_with_info = (*file_iterator)();
@@ -1514,36 +1521,42 @@ namespace
                             "in S3 or all files are empty. You must specify table structure manually",
                             configuration.format);
 
-                    return nullptr;
+                    return {nullptr, std::nullopt};
                 }
 
-                /// S3 file iterator could get new keys after new iteration, check them in schema cache.
-                if (getContext()->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
+                /// S3 file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size)
                 {
-                    columns_from_cache = StorageS3::tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, getContext());
+                    auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
                     prev_read_keys_size = read_keys.size();
                     if (columns_from_cache)
-                        return nullptr;
+                        return {nullptr, columns_from_cache};
                 }
 
                 if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info.info && current_key_with_info.info->size == 0)
                     continue;
 
+                /// In union mode, check cached columns only for current key.
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+                {
+                    StorageS3::KeysWithInfo keys = {current_key_with_info};
+                    if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end()))
+                    {
+                        first = false;
+                        return {nullptr, columns_from_cache};
+                    }
+                }
+
                 int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
                 auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info.key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
                 if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
                 {
                     first = false;
-                    return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info.key, configuration.compression_method), zstd_window_log_max);
+                    return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info.key, configuration.compression_method), zstd_window_log_max), std::nullopt};
                 }
             }
         }
 
-        std::optional<ColumnsDescription> getCachedColumns() override
-        {
-            return columns_from_cache;
-        }
-
         void setNumRowsToLastFile(size_t num_rows) override
         {
             if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
@@ -1554,12 +1567,85 @@ namespace
             StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
+        void setSchemaToLastFile(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
+
+            String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info.key;
+            auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+            StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns);
+        }
+
+        void setResultingSchema(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
+                return;
+
+            auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
+            Strings sources;
+            sources.reserve(read_keys.size());
+            std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem.key; });
+            auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
+            StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
+        }
+
+        String getLastFileName() const override { return current_key_with_info.key; }
+
     private:
+        std::optional<ColumnsDescription> tryGetColumnsFromCache(
+            const StorageS3::KeysWithInfo::const_iterator & begin,
+            const StorageS3::KeysWithInfo::const_iterator & end)
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+                return std::nullopt;
+
+            auto & schema_cache = StorageS3::getSchemaCache(getContext());
+            for (auto it = begin; it < end; ++it)
+            {
+                auto get_last_mod_time = [&]
+                {
+                    time_t last_modification_time = 0;
+                    if (it->info)
+                    {
+                        last_modification_time = it->info->last_modification_time;
+                    }
+                    else
+                    {
+                        /// Note that in case of exception in getObjectInfo returned info will be empty,
+                        /// but schema cache will handle this case and won't return columns from cache
+                        /// because we can't say that it's valid without last modification time.
+                        last_modification_time = S3::getObjectInfo(
+                             *configuration.client,
+                             configuration.url.bucket,
+                             it->key,
+                             configuration.url.version_id,
+                             configuration.request_settings,
+                             /*with_metadata=*/ false,
+                             /*for_disk_s3=*/ false,
+                             /*throw_on_error= */ false).last_modification_time;
+                    }
+
+                    return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
+                };
+
+                String path = fs::path(configuration.url.bucket) / it->key;
+                String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
+                auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
+                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+                if (columns)
+                    return columns;
+            }
+
+            return std::nullopt;
+        }
+
         std::shared_ptr<StorageS3Source::IIterator> file_iterator;
         const StorageS3Source::KeysWithInfo & read_keys;
         const StorageS3::Configuration & configuration;
         const std::optional<FormatSettings> & format_settings;
-        std::optional<ColumnsDescription> columns_from_cache;
         StorageS3Source::KeyWithInfo current_key_with_info;
         size_t prev_read_keys_size;
         bool first = true;
@@ -1576,28 +1662,10 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 
     auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys);
 
-    std::optional<ColumnsDescription> columns_from_cache;
-    if (ctx->getSettingsRef().schema_inference_use_cache_for_s3)
-        columns_from_cache = tryGetColumnsFromCache(read_keys.begin(), read_keys.end(), configuration, format_settings, ctx);
-
-    ColumnsDescription columns;
-    if (columns_from_cache)
-    {
-        columns = *columns_from_cache;
-    }
-    else
-    {
-        ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx);
-        columns = readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
-    }
-
-    if (ctx->getSettingsRef().schema_inference_use_cache_for_s3)
-        addColumnsToCache(read_keys, configuration, columns, configuration.format, format_settings, ctx);
-
-    return columns;
+    ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx);
+    return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
 }
 
-
 void registerStorageS3Impl(const String & name, StorageFactory & factory)
 {
     factory.registerStorage(name, [](const StorageFactory::Arguments & args)
@@ -1687,70 +1755,6 @@ SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx)
     return schema_cache;
 }
 
-std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
-    const KeysWithInfo::const_iterator & begin,
-    const KeysWithInfo::const_iterator & end,
-    const Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    const ContextPtr & ctx)
-{
-    auto & schema_cache = getSchemaCache(ctx);
-    for (auto it = begin; it < end; ++it)
-    {
-        auto get_last_mod_time = [&]
-        {
-            time_t last_modification_time = 0;
-            if (it->info)
-            {
-                last_modification_time = it->info->last_modification_time;
-            }
-            else
-            {
-                /// Note that in case of exception in getObjectInfo returned info will be empty,
-                /// but schema cache will handle this case and won't return columns from cache
-                /// because we can't say that it's valid without last modification time.
-                last_modification_time = S3::getObjectInfo(
-                    *configuration.client,
-                    configuration.url.bucket,
-                    it->key,
-                    configuration.url.version_id,
-                    configuration.request_settings,
-                    /*with_metadata=*/ false,
-                    /*for_disk_s3=*/ false,
-                    /*throw_on_error= */ false).last_modification_time;
-            }
-
-            return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
-        };
-
-        String path = fs::path(configuration.url.bucket) / it->key;
-        String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
-        auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, ctx);
-        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        if (columns)
-            return columns;
-    }
-
-    return std::nullopt;
-}
-
-void StorageS3::addColumnsToCache(
-    const KeysWithInfo & keys,
-    const Configuration & configuration,
-    const ColumnsDescription & columns,
-    const String & format_name,
-    const std::optional<FormatSettings> & format_settings,
-    const ContextPtr & ctx)
-{
-    auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
-    Strings sources;
-    sources.reserve(keys.size());
-    std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem.key; });
-    auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx);
-    auto & schema_cache = getSchemaCache(ctx);
-    schema_cache.addManyColumns(cache_keys, columns);
-}
-
 }
 
 #endif
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 088f9000ce8..b22f8275bf0 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -344,21 +344,6 @@ public:
 
     using KeysWithInfo = StorageS3Source::KeysWithInfo;
 
-    static std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const KeysWithInfo::const_iterator & begin,
-        const KeysWithInfo::const_iterator & end,
-        const Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & ctx);
-
-    static void addColumnsToCache(
-        const KeysWithInfo & keys,
-        const Configuration & configuration,
-        const ColumnsDescription & columns,
-        const String & format_name,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & ctx);
-
     bool supportsTrivialCountOptimization() const override { return true; }
 
 protected:
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 58f01312399..ffb92db9279 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -708,30 +708,53 @@ namespace
             const HTTPHeaderEntries & headers_,
             const std::optional<FormatSettings> & format_settings_,
             const ContextPtr & context_)
-            : WithContext(context_), urls_to_check(urls_to_check_), format(format_), compression_method(compression_method_), headers(headers_), format_settings(format_settings_)
+            : WithContext(context_), format(format_), compression_method(compression_method_), headers(headers_), format_settings(format_settings_)
         {
-            it = urls_to_check.cbegin();
+            url_options_to_check.reserve(urls_to_check_.size());
+            for (const auto & url : urls_to_check_)
+                url_options_to_check.push_back(getFailoverOptions(url, getContext()->getSettingsRef().glob_expansion_max_elements));
         }
 
-        std::unique_ptr<ReadBuffer> next() override
+        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
         {
+            bool is_first = (current_index == 0);
+            /// For default mode check cached columns for all urls on first iteration.
+            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                for (const auto & options : url_options_to_check)
+                {
+                    if (auto cached_columns = tryGetColumnsFromCache(options))
+                        return {nullptr, cached_columns};
+                }
+            }
+
             std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> uri_and_buf;
             do
             {
-                if (it == urls_to_check.cend())
+                if (current_index == url_options_to_check.size())
                 {
-                    if (first)
+                    if (is_first)
                         throw Exception(
                             ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                             "Cannot extract table structure from {} format file, because all files are empty. "
                             "You must specify table structure manually",
                             format);
-                    return nullptr;
+                    return {nullptr, std::nullopt};
                 }
 
+                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+                {
+                    if (auto cached_columns = tryGetColumnsFromCache(url_options_to_check[current_index]))
+                    {
+                        ++current_index;
+                        return {nullptr, cached_columns};
+                    }
+                }
+
+                auto first_option = url_options_to_check[current_index].cbegin();
                 uri_and_buf = StorageURLSource::getFirstAvailableURIAndReadBuffer(
-                    it,
-                    urls_to_check.cend(),
+                    first_option,
+                    url_options_to_check[current_index].cend(),
                     getContext(),
                     {},
                     Poco::Net::HTTPRequest::HTTP_GET,
@@ -742,35 +765,87 @@ namespace
                     false,
                     false);
 
-                ++it;
+                ++current_index;
             } while (getContext()->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof());
 
-            first = false;
-            return wrapReadBufferWithCompressionMethod(
+            current_url_option = uri_and_buf.first.toString();
+            return {wrapReadBufferWithCompressionMethod(
                 std::move(uri_and_buf.second),
                 compression_method,
-                static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
+                static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max)), std::nullopt};
         }
 
         void setNumRowsToLastFile(size_t num_rows) override
         {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url)
                 return;
 
-            String source = *std::prev(it);
-            auto key = getKeyForSchemaCache(source, format, format_settings, getContext());
+            auto key = getKeyForSchemaCache(current_url_option, format, format_settings, getContext());
             StorageURL::getSchemaCache(getContext()).addNumRows(key, num_rows);
         }
 
+        void setSchemaToLastFile(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                return;
+
+            auto key = getKeyForSchemaCache(current_url_option, format, format_settings, getContext());
+            StorageURL::getSchemaCache(getContext()).addColumns(key, columns);
+        }
+
+        void setResultingSchema(const ColumnsDescription & columns) override
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
+                return;
+
+            for (const auto & options : url_options_to_check)
+            {
+                auto keys = getKeysForSchemaCache(options, format, format_settings, getContext());
+                StorageURL::getSchemaCache(getContext()).addManyColumns(keys, columns);
+            }
+        }
+
+        String getLastFileName() const override { return current_url_option; }
+
     private:
-        const std::vector<String> & urls_to_check;
-        std::vector<String>::const_iterator it;
+        std::optional<ColumnsDescription> tryGetColumnsFromCache(const Strings & urls)
+        {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_url)
+                return std::nullopt;
+
+            auto & schema_cache = StorageURL::getSchemaCache(getContext());
+            for (const auto & url : urls)
+            {
+                auto get_last_mod_time = [&]() -> std::optional<time_t>
+                {
+                    auto last_mod_time = StorageURL::tryGetLastModificationTime(url, headers, credentials, getContext());
+                    /// Some URLs could not have Last-Modified header, in this case we cannot be sure that
+                    /// data wasn't changed after adding it's schema to cache. Use schema from cache only if
+                    /// special setting for this case is enabled.
+                    if (!last_mod_time && !getContext()->getSettingsRef().schema_inference_cache_require_modification_time_for_url)
+                        return 0;
+                    return last_mod_time;
+                };
+
+                auto cache_key = getKeyForSchemaCache(url, format, format_settings, getContext());
+                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+                if (columns)
+                    return columns;
+            }
+
+            return std::nullopt;
+        }
+
+        std::vector<std::vector<String>> url_options_to_check;
+        size_t current_index = 0;
+        String current_url_option;
         const String & format;
         const CompressionMethod & compression_method;
         const HTTPHeaderEntries & headers;
         Poco::Net::HTTPBasicCredentials credentials;
         const std::optional<FormatSettings> & format_settings;
-        bool first = true;
     };
 }
 
@@ -788,39 +863,12 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
 
     std::vector<String> urls_to_check;
     if (urlWithGlobs(uri))
-    {
-        size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
-        auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses, "url");
-        for (const auto & description : uri_descriptions)
-        {
-            auto options = parseRemoteDescription(description, 0, description.size(), '|', max_addresses, "url");
-            urls_to_check.insert(urls_to_check.end(), options.begin(), options.end());
-        }
-    }
+        urls_to_check = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements, "url");
     else
-    {
         urls_to_check = {uri};
-    }
 
-    std::optional<ColumnsDescription> columns_from_cache;
-    if (context->getSettingsRef().schema_inference_use_cache_for_url)
-        columns_from_cache = tryGetColumnsFromCache(urls_to_check, headers, credentials, format, format_settings, context);
-
-    ColumnsDescription columns;
-    if (columns_from_cache)
-    {
-        columns = *columns_from_cache;
-    }
-    else
-    {
-        ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context);
-        columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context);
-    }
-
-    if (context->getSettingsRef().schema_inference_use_cache_for_url)
-        addColumnsToCache(urls_to_check, columns, format, format_settings, context);
-
-    return columns;
+    ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context);
+    return readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context);
 }
 
 bool IStorageURLBase::supportsSubsetOfColumns(const ContextPtr & context) const
@@ -1033,49 +1081,6 @@ SchemaCache & IStorageURLBase::getSchemaCache(const ContextPtr & context)
     return schema_cache;
 }
 
-std::optional<ColumnsDescription> IStorageURLBase::tryGetColumnsFromCache(
-    const Strings & urls,
-    const HTTPHeaderEntries & headers,
-    const Poco::Net::HTTPBasicCredentials & credentials,
-    const String & format_name,
-    const std::optional<FormatSettings> & format_settings,
-    const ContextPtr & context)
-{
-    auto & schema_cache = getSchemaCache(context);
-    for (const auto & url : urls)
-    {
-        auto get_last_mod_time = [&]() -> std::optional<time_t>
-        {
-            auto last_mod_time = tryGetLastModificationTime(url, headers, credentials, context);
-            /// Some URLs could not have Last-Modified header, in this case we cannot be sure that
-            /// data wasn't changed after adding it's schema to cache. Use schema from cache only if
-            /// special setting for this case is enabled.
-            if (!last_mod_time && !context->getSettingsRef().schema_inference_cache_require_modification_time_for_url)
-                return 0;
-            return last_mod_time;
-        };
-
-        auto cache_key = getKeyForSchemaCache(url, format_name, format_settings, context);
-        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        if (columns)
-            return columns;
-    }
-
-    return std::nullopt;
-}
-
-void IStorageURLBase::addColumnsToCache(
-    const Strings & urls,
-    const ColumnsDescription & columns,
-    const String & format_name,
-    const std::optional<FormatSettings> & format_settings,
-    const ContextPtr & context)
-{
-    auto & schema_cache = getSchemaCache(context);
-    auto cache_keys = getKeysForSchemaCache(urls, format_name, format_settings, context);
-    schema_cache.addManyColumns(cache_keys, columns);
-}
-
 std::optional<time_t> IStorageURLBase::tryGetLastModificationTime(
     const String & url,
     const HTTPHeaderEntries & headers,
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 6c5c50af326..8257bd65f9c 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -124,21 +124,6 @@ protected:
 
 private:
     virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0;
-
-    static std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const Strings & urls,
-        const HTTPHeaderEntries & headers,
-        const Poco::Net::HTTPBasicCredentials & credentials,
-        const String & format_name,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & context);
-
-    static void addColumnsToCache(
-        const Strings & urls,
-        const ColumnsDescription & columns,
-        const String & format_name,
-        const std::optional<FormatSettings> & format_settings,
-        const ContextPtr & context);
 };
 
 
diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
index a19cb1442c9..8dada69ee68 100644
--- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
+++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
@@ -40,7 +40,8 @@ NamesAndTypesList StorageSystemSchemaInferenceCache::getNamesAndTypes()
         {"additional_format_info", std::make_shared<DataTypeString>()},
         {"registration_time", std::make_shared<DataTypeDateTime>()},
         {"schema", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
-        {"number_of_rows", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())}
+        {"number_of_rows", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
+        {"schema_inference_mode", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
     };
 }
 
@@ -64,6 +65,7 @@ static void fillDataImpl(MutableColumns & res_columns, SchemaCache & schema_cach
             res_columns[6]->insert(*schema_info.num_rows);
         else
             res_columns[6]->insertDefault();
+        res_columns[7]->insert(key.schema_inference_mode);
     }
 }
 
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 3ec0506c525..c9a9880eaa8 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1015,3 +1015,60 @@ def test_filtering_by_file_or_path(cluster):
     )
 
     assert int(result) == 1
+
+
+def test_union_schema_inference_mode(cluster):
+    node = cluster.instances["node"]
+    azure_query(
+        node,
+        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference1.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') select 1 as a",
+    )
+
+    azure_query(
+        node,
+        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference2.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') select 2 as b",
+    )
+
+    node.query("system drop schema cache for azure")
+
+    result = azure_query(
+        node,
+        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "a\tNullable(Int64)\nb\tNullable(Int64)\n"
+
+    result = node.query(
+        "select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%test_union_schema_inference%' order by file format TSV"
+    )
+    assert (
+        result == "UNION\ttest_union_schema_inference1.jsonl\ta Nullable(Int64)\n"
+        "UNION\ttest_union_schema_inference2.jsonl\tb Nullable(Int64)\n"
+    )
+    result = azure_query(
+        node,
+        "select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') order by tuple(*) settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "1\t\\N\n" "\\N\t2\n"
+    node.query(f"system drop schema cache for hdfs")
+    result = azure_query(
+        node,
+        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference2.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "b\tNullable(Int64)\n"
+
+    result = azure_query(
+        node,
+        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
+    azure_query(
+        node,
+        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference3.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', TSV) select 'Error'",
+    )
+
+    error = azure_query(
+        node,
+        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+        expect_error="true",
+    )
+    assert "Cannot extract table structure" in error
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index eacb5295079..8ed1e4b6c0e 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -998,6 +998,55 @@ def test_read_subcolumns(started_cluster):
     )
 
 
+def test_union_schema_inference_mode(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    node.query(
+        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference1.jsonl') select 1 as a"
+    )
+
+    node.query(
+        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference2.jsonl') select 2 as b"
+    )
+
+    node.query("system drop schema cache for hdfs")
+
+    result = node.query(
+        "desc hdfs('hdfs://hdfs1:9000/test_union_schema_inference*.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+    )
+    assert result == "a\tNullable(Int64)\nb\tNullable(Int64)\n"
+
+    result = node.query(
+        "select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%test_union_schema_inference%' order by file format TSV"
+    )
+    assert (
+        result == "UNION\ttest_union_schema_inference1.jsonl\ta Nullable(Int64)\n"
+        "UNION\ttest_union_schema_inference2.jsonl\tb Nullable(Int64)\n"
+    )
+    result = node.query(
+        "select * from hdfs('hdfs://hdfs1:9000/test_union_schema_inference*.jsonl') order by tuple(*) settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+    )
+    assert result == "1\t\\N\n" "\\N\t2\n"
+    node.query(f"system drop schema cache for hdfs")
+    result = node.query(
+        "desc hdfs('hdfs://hdfs1:9000/test_union_schema_inference2.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+    )
+    assert result == "b\tNullable(Int64)\n"
+
+    result = node.query(
+        "desc hdfs('hdfs://hdfs1:9000/test_union_schema_inference*.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+    )
+    assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
+    node.query(
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference3.jsonl', TSV) select 'Error'"
+    )
+
+    error = node.query_and_get_error(
+        "desc hdfs('hdfs://hdfs1:9000/test_union_schema_inference*.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+    )
+    assert "Cannot extract table structure" in error
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index ca2ddeb78b0..2f176a95e73 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -2072,3 +2072,65 @@ def test_filtering_by_file_or_path(started_cluster):
     )
 
     assert int(result) == 1
+
+
+def test_union_schema_inference_mode(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["s3_non_default"]
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference1.jsonl') select 1 as a"
+    )
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference2.jsonl') select 2 as b"
+    )
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference3.jsonl') select 2 as c"
+    )
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference4.jsonl', TSV) select 'Error'"
+    )
+
+    for engine in ["s3", "url"]:
+        instance.query("system drop schema cache for s3")
+
+        result = instance.query(
+            f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+        )
+        assert result == "a\tNullable(Int64)\nb\tNullable(Int64)\nc\tNullable(Int64)\n"
+
+        result = instance.query(
+            "select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%test_union_schema_inference%' order by file format TSV"
+        )
+        assert (
+            result == "UNION\ttest_union_schema_inference1.jsonl\ta Nullable(Int64)\n"
+            "UNION\ttest_union_schema_inference2.jsonl\tb Nullable(Int64)\n"
+            "UNION\ttest_union_schema_inference3.jsonl\tc Nullable(Int64)\n"
+        )
+        result = instance.query(
+            f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3}}.jsonl') order by tuple(*) settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+        )
+        assert result == "1\t\\N\t\\N\n" "\\N\t2\t\\N\n" "\\N\t\\N\t2\n"
+
+        instance.query(f"system drop schema cache for {engine}")
+        result = instance.query(
+            f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference2.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+        )
+        assert result == "b\tNullable(Int64)\n"
+
+        result = instance.query(
+            f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+        )
+        assert (
+            result == "a\tNullable(Int64)\n"
+            "b\tNullable(Int64)\n"
+            "c\tNullable(Int64)\n"
+        )
+
+        error = instance.query_and_get_error(
+            f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3,4}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV"
+        )
+        assert "Cannot extract table structure" in error
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.reference b/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
new file mode 100644
index 00000000000..3850ef3935b
--- /dev/null
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
@@ -0,0 +1,33 @@
+c	Nullable(String)					
+b	Nullable(Int64)					
+obj	Tuple(f2 Nullable(String), f3 Nullable(Int64), f1 Nullable(Int64))					
+a	Nullable(Int64)					
+{"c":"hello","b":null,"obj":{"f2":null,"f3":null,"f1":null},"a":null}
+{"c":null,"b":"2","obj":{"f2":"Some string","f3":"2","f1":null},"a":null}
+{"c":null,"b":null,"obj":{"f2":"2020-01-01","f3":null,"f1":"1"},"a":"1"}
+UNION	data1.jsonl	a Nullable(Int64), obj Tuple(f1 Nullable(Int64), f2 Nullable(Date))
+UNION	data2.jsonl	b Nullable(Int64), obj Tuple(f2 Nullable(String), f3 Nullable(Int64))
+UNION	data3.jsonl	c Nullable(String)
+c	Nullable(String)					
+c	Nullable(String)					
+b	Nullable(Int64)					
+obj	Tuple(f2 Nullable(String), f3 Nullable(Int64), f1 Nullable(Int64))					
+a	Nullable(Int64)					
+a	Nullable(Int64)					
+obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+b	Nullable(Int64)					
+c	Nullable(String)					
+{"a":"1","obj":{"f1":"1","f2":"2020-01-01","f3":null},"b":null,"c":null}
+{"a":null,"obj":{"f1":null,"f2":"Some string","f3":"2"},"b":"2","c":null}
+{"a":null,"obj":{"f1":null,"f2":null,"f3":null},"b":null,"c":"hello"}
+UNION	archive.tar::data1.jsonl	a Nullable(Int64), obj Tuple(f1 Nullable(Int64), f2 Nullable(Date))
+UNION	archive.tar::data2.jsonl	b Nullable(Int64), obj Tuple(f2 Nullable(String), f3 Nullable(Int64))
+UNION	archive.tar::data3.jsonl	c Nullable(String)
+c	Nullable(String)					
+a	Nullable(Int64)					
+obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+b	Nullable(Int64)					
+c	Nullable(String)					
+1
+1
+1
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
new file mode 100755
index 00000000000..86b2e0801f9
--- /dev/null
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-msan, no-ubsan
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+mkdir -p $CLICKHOUSE_TEST_UNIQUE_NAME
+rm -rf $CLICKHOUSE_TEST_UNIQUE_NAME/*
+
+echo '{"a" : 1, "obj" : {"f1" : 1, "f2" : "2020-01-01"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.jsonl
+echo '{"b" : 2, "obj" : {"f3" : 2, "f2" : "Some string"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.jsonl
+echo '{"c" : "hello"}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data3.jsonl
+
+$CLICKHOUSE_LOCAL -nm -q "
+set schema_inference_mode = 'union';
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl');
+select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl') order by tuple(*) format JSONEachRow;
+select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file;
+"
+
+$CLICKHOUSE_LOCAL -nm -q "
+set schema_inference_mode = 'union';
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data3.jsonl');
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl');
+"
+
+cd $CLICKHOUSE_TEST_UNIQUE_NAME/ && tar -cf archive.tar data1.jsonl data2.jsonl data3.jsonl && cd ..
+
+$CLICKHOUSE_LOCAL -nm -q "
+set schema_inference_mode = 'union';
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data*.jsonl');
+select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data*.jsonl') order by tuple(*) format JSONEachRow;
+select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file;
+"
+
+$CLICKHOUSE_LOCAL -nm -q "
+set schema_inference_mode = 'union';
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data3.jsonl');
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data*.jsonl');
+"
+
+echo 'Error' > $CLICKHOUSE_TEST_UNIQUE_NAME/data4.jsonl
+$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "Cannot extract table structure"
+
+$CLICKHOUSE_LOCAL -nm -q "
+set schema_inference_mode = 'union';
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{2,3}.jsonl');
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl');
+" 2>&1 | grep -c -F "Cannot extract table structure"
+
+echo 42 > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.csv
+echo 42, 43 > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.csv
+
+$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.csv') settings schema_inference_mode='union'" 2>&1 | grep -c -F "BAD_ARGUMENTS";
+
+rm -rf $CLICKHOUSE_TEST_UNIQUE_NAME

From 9ec43d8cb994fad19043855345303faa8a713d33 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 20 Oct 2023 21:02:33 +0000
Subject: [PATCH 002/213] Add basic docs, fix unrelated changes

---
 docs/en/operations/settings/settings.md           |  6 ++++++
 src/Functions/FunctionGenerateRandomStructure.cpp | 12 ++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index ccbf54843e4..1c8c9720121 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4346,6 +4346,12 @@ Possible values:
 
 Default value: 2.
 
+### schema_inference_mode {schema_inference_mode}
+
+The mode of schema inference. Possible values: `default` and `union`.
+
+Default value: `default`.
+
 ## compatibility {#compatibility}
 
 The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting.
diff --git a/src/Functions/FunctionGenerateRandomStructure.cpp b/src/Functions/FunctionGenerateRandomStructure.cpp
index 8e086f075a4..f85b2596530 100644
--- a/src/Functions/FunctionGenerateRandomStructure.cpp
+++ b/src/Functions/FunctionGenerateRandomStructure.cpp
@@ -34,7 +34,7 @@ namespace
     const size_t MAX_DECIMAL256_PRECISION = 76;
     const size_t MAX_DEPTH = 16;
 
-    constexpr std::array<TypeIndex, 28> simple_types
+    constexpr std::array<TypeIndex, 29> simple_types
     {
         TypeIndex::Int8,
         TypeIndex::UInt8,
@@ -64,7 +64,7 @@ namespace
         TypeIndex::Enum16,
         TypeIndex::IPv4,
         TypeIndex::IPv6,
-//        TypeIndex::UUID,
+        TypeIndex::UUID,
     };
 
     constexpr std::array<TypeIndex, 5> complex_types
@@ -76,7 +76,7 @@ namespace
         TypeIndex::Map,
     };
 
-    constexpr std::array<TypeIndex, 21> map_key_types
+    constexpr std::array<TypeIndex, 22> map_key_types
     {
         TypeIndex::Int8,
         TypeIndex::UInt8,
@@ -98,11 +98,11 @@ namespace
         TypeIndex::IPv4,
         TypeIndex::Enum8,
         TypeIndex::Enum16,
-//        TypeIndex::UUID,
+        TypeIndex::UUID,
         TypeIndex::LowCardinality,
     };
 
-    constexpr std::array<TypeIndex, 21> suspicious_lc_types
+    constexpr std::array<TypeIndex, 22> suspicious_lc_types
     {
         TypeIndex::Int8,
         TypeIndex::UInt8,
@@ -125,7 +125,7 @@ namespace
         TypeIndex::FixedString,
         TypeIndex::IPv4,
         TypeIndex::IPv6,
-//        TypeIndex::UUID,
+        TypeIndex::UUID,
     };
 
     template <bool allow_complex_types>

From 544b217d91c1978760a5d5fe334a3fbe447afd76 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 20 Oct 2023 21:05:26 +0000
Subject: [PATCH 003/213] Fix style

---
 src/Formats/ReadSchemaUtils.cpp                              | 1 +
 .../queries/0_stateless/02900_union_schema_inference_mode.sh | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index f2e831dfc46..c5630267e3f 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -14,6 +14,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int ONLY_NULLS_WHILE_READING_SCHEMA;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int TYPE_MISMATCH;
 }
 
 static std::optional<NamesAndTypesList> getOrderedColumnsList(const NamesAndTypesList & columns_list, const Names & columns_order_hint)
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
index 86b2e0801f9..76a7d7e4cd3 100755
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
@@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 mkdir -p $CLICKHOUSE_TEST_UNIQUE_NAME
-rm -rf $CLICKHOUSE_TEST_UNIQUE_NAME/*
+rm -rf ${CLICKHOUSE_TEST_UNIQUE_NAME}/*
 
 echo '{"a" : 1, "obj" : {"f1" : 1, "f2" : "2020-01-01"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.jsonl
 echo '{"b" : 2, "obj" : {"f3" : 2, "f2" : "Some string"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.jsonl
@@ -54,4 +54,5 @@ echo 42, 43 > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.csv
 
 $CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.csv') settings schema_inference_mode='union'" 2>&1 | grep -c -F "BAD_ARGUMENTS";
 
-rm -rf $CLICKHOUSE_TEST_UNIQUE_NAME
+rm -rf ${CLICKHOUSE_TEST_UNIQUE_NAME}
+

From b0c3fee4a98d06e1de3c8ee7e52963dbfa39c9e2 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 20 Oct 2023 21:25:01 +0000
Subject: [PATCH 004/213] Fix style

---
 tests/queries/0_stateless/02900_union_schema_inference_mode.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
index 76a7d7e4cd3..0e520e27cb4 100755
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
@@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 mkdir -p $CLICKHOUSE_TEST_UNIQUE_NAME
-rm -rf ${CLICKHOUSE_TEST_UNIQUE_NAME}/*
+rm -rf "${CLICKHOUSE_TEST_UNIQUE_NAME}/*"
 
 echo '{"a" : 1, "obj" : {"f1" : 1, "f2" : "2020-01-01"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.jsonl
 echo '{"b" : 2, "obj" : {"f3" : 2, "f2" : "Some string"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.jsonl

From de90e5fbb66abc61349a42e854b54a0531829a3b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 20 Oct 2023 23:39:09 +0200
Subject: [PATCH 005/213] Try fix style

---
 tests/queries/0_stateless/02900_union_schema_inference_mode.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
index 0e520e27cb4..5e53b857056 100755
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
@@ -6,8 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 mkdir -p $CLICKHOUSE_TEST_UNIQUE_NAME
-rm -rf "${CLICKHOUSE_TEST_UNIQUE_NAME}/*"
-
 echo '{"a" : 1, "obj" : {"f1" : 1, "f2" : "2020-01-01"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.jsonl
 echo '{"b" : 2, "obj" : {"f3" : 2, "f2" : "Some string"}}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.jsonl
 echo '{"c" : "hello"}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data3.jsonl

From cfa510ea0ac324fc9c279f3f4afcb621104541d9 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 23 Oct 2023 14:38:34 +0000
Subject: [PATCH 006/213] Add more documentation, fix build

---
 docs/en/interfaces/schema-inference.md  | 99 +++++++++++++++++++++++++
 docs/en/operations/settings/settings.md |  3 +
 src/Storages/StorageS3.cpp              | 12 +--
 3 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 0aadb09730a..f361bd6cdb2 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -1846,3 +1846,102 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') SETTINGS allow_ex
 │ json │ Object('json') │              │                    │         │                  │                │
 └──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
+
+## Schema inference modes {#schema-inference-modes}
+
+Schema inference from the set of data files can work in 2 different modes: `default` and `union`.
+The mode is controlled by the setting `schema_inference_mode`. 
+
+### Default mode {#default-schema-inference-mode}
+
+In default mode, ClickHouse assumes that all files have the same schema and tries to infer the schema by reading files one by one until it succeeds.
+
+Example:
+
+Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content:
+
+`data1.jsonl`:
+```json
+{"field1" :  1, "field2" :  null}
+{"field1" :  2, "field2" :  null}
+{"field1" :  3, "field2" :  null}
+```
+
+`data2.jsonl`:
+```json
+{"field1" :  4, "field2" :  "Data4"}
+{"field1" :  5, "field2" :  "Data5"}
+{"field1" :  6, "field2" :  "Data5"}
+```
+
+`data3.jsonl`:
+```json
+{"field1" :  7, "field2" :  "Data7", "field3" :  [1, 2, 3]}
+{"field1" :  8, "field2" :  "Data8", "field3" :  [4, 5, 6]}
+{"field1" :  9, "field2" :  "Data9", "field3" :  [7, 8, 9]}
+```
+
+Let's try to use schema inference on these 3 files:
+```sql
+:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='default'
+```
+
+Result:
+```text
+┌─name───┬─type─────────────┐
+│ field1 │ Nullable(Int64)  │
+│ field2 │ Nullable(String) │
+└────────┴──────────────────┘
+```
+
+As we can see, we don't have `field3` from file `data3.jsonl`. 
+It happens because ClickHouse first tried to infer schema from file `data1.jsonl`, failed because of only nulls for field `field2`,
+and then tried to infer schema from `data2.jsonl` and succeeded, so data from file `data3.jsonl` wasn't read.
+
+### Union mode {#default-schema-inference-mode}
+
+In union mode, ClickHouse assumes that files can have different schemas, so it infer schemas of all files and then union them to the common schema. 
+
+Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content:
+
+`data1.jsonl`:
+```json
+{"field1" :  1}
+{"field1" :  2}
+{"field1" :  3}
+```
+
+`data2.jsonl`:
+```json
+{"field2" :  "Data4"}
+{"field2" :  "Data5"}
+{"field2" :  "Data5"}
+```
+
+`data3.jsonl`:
+```json
+{"field3" :  [1, 2, 3]}
+{"field3" :  [4, 5, 6]}
+{"field3" :  [7, 8, 9]}
+```
+
+Let's try to use schema inference on these 3 files:
+```sql
+:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='union'
+```
+
+Result:
+```text
+┌─name───┬─type───────────────────┐
+│ field1 │ Nullable(Int64)        │
+│ field2 │ Nullable(String)       │
+│ field3 │ Array(Nullable(Int64)) │
+└────────┴────────────────────────┘
+```
+
+As we can see, we have all fields from all files.
+
+Note:
+- As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachEow, etc).
+- If ClickHouse cannot infer the schema from one of the files, the exception will be thrown.
+- If you have a lot of files, reading schema from all of them can take a lot of time.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 1c8c9720121..27ac051631a 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4305,6 +4305,8 @@ Default value: `1GiB`.
 
 ## Schema Inference settings
 
+See [schema inference](../../interfaces/schema-inference.md#schema-inference-modes) documentation for more details.
+
 ### schema_inference_use_cache_for_file {schema_inference_use_cache_for_file}
 
 Enable schemas cache for schema inference in `file` table function.
@@ -4349,6 +4351,7 @@ Default value: 2.
 ### schema_inference_mode {schema_inference_mode}
 
 The mode of schema inference. Possible values: `default` and `union`.
+See [schema inference modes](../../interfaces/schema-inference.md#schema-inference-modes) section for more details.
 
 Default value: `default`.
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index b6df46ed589..7430ec5e65c 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1539,7 +1539,7 @@ namespace
                 /// In union mode, check cached columns only for current key.
                 if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
                 {
-                    StorageS3::KeysWithInfo keys = {*current_key_with_info};
+                    StorageS3::KeysWithInfo keys = {current_key_with_info};
                     if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end()))
                     {
                         first = false;
@@ -1587,7 +1587,7 @@ namespace
             auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
             Strings sources;
             sources.reserve(read_keys.size());
-            std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem.key; });
+            std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
             auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
             StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
@@ -1608,9 +1608,9 @@ namespace
                 auto get_last_mod_time = [&]
                 {
                     time_t last_modification_time = 0;
-                    if (it->info)
+                    if ((*it)->info)
                     {
-                        last_modification_time = it->info->last_modification_time;
+                        last_modification_time = (*it)->info->last_modification_time;
                     }
                     else
                     {
@@ -1620,7 +1620,7 @@ namespace
                         last_modification_time = S3::getObjectInfo(
                              *configuration.client,
                              configuration.url.bucket,
-                             it->key,
+                             (*it)->key,
                              configuration.url.version_id,
                              configuration.request_settings,
                              /*with_metadata=*/ false,
@@ -1631,7 +1631,7 @@ namespace
                     return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
                 };
 
-                String path = fs::path(configuration.url.bucket) / it->key;
+                String path = fs::path(configuration.url.bucket) / (*it)->key;
                 String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
                 auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
                 auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);

From 91632e355454569b82db0ac3576d8c5857504ee4 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 24 Oct 2023 14:30:05 +0200
Subject: [PATCH 007/213] Fix typo in docs

---
 docs/en/interfaces/schema-inference.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index f361bd6cdb2..b2da03c5776 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -1942,6 +1942,6 @@ Result:
 As we can see, we have all fields from all files.
 
 Note:
-- As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachEow, etc).
+- As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachRow, etc).
 - If ClickHouse cannot infer the schema from one of the files, the exception will be thrown.
 - If you have a lot of files, reading schema from all of them can take a lot of time.

From 64e5797fc85ed208f6542a11448a0035e3014570 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 24 Oct 2023 14:30:26 +0200
Subject: [PATCH 008/213] Update aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 2c73c1032f9..f104c13eab7 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -860,6 +860,7 @@ TLSv
 TPCH
 TSDB
 TSVRaw
+TSVWithNames
 TSVs
 TSan
 TThe

From 844c1fb6888bcf620484717e8599bb67a49301ce Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 24 Oct 2023 19:35:03 +0200
Subject: [PATCH 009/213] Fix

---
 src/Storages/StorageS3.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 7430ec5e65c..ebfd36fd9c8 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1592,7 +1592,12 @@ namespace
             StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
 
-        String getLastFileName() const override { return current_key_with_info->key; }
+        String getLastFileName() const override
+        {
+            if (current_key_with_info)
+                return current_key_with_info->key;
+            return "";
+        }
 
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(

From e630868eac58c362c0b9c25a879ed065e269b782 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 27 Oct 2023 14:11:27 +0000
Subject: [PATCH 010/213] Fix tests

---
 src/Storages/StorageAzureBlob.cpp              |  2 +-
 ...375_system_schema_inference_cache.reference |  1 +
 .../02721_parquet_field_not_found.sh           |  2 +-
 ...02900_union_schema_inference_mode.reference | 18 +++++++++---------
 .../02900_union_schema_inference_mode.sh       | 18 +++++++++---------
 5 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index b005f1a4445..afd3e3c4b4b 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1285,7 +1285,7 @@ namespace
         void setResultingSchema(const ColumnsDescription & columns) override
         {
             if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
+                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
                 return;
 
             auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference
index 94bdf1b5ddb..676fb441f53 100644
--- a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference
+++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference
@@ -5,6 +5,7 @@ additional_format_info	String
 registration_time	DateTime					
 schema	Nullable(String)					
 number_of_rows	Nullable(UInt64)					
+schema_inference_mode	Nullable(String)					
 x	Nullable(Int64)					
 s	Nullable(String)					
 x	Nullable(Int64)					
diff --git a/tests/queries/0_stateless/02721_parquet_field_not_found.sh b/tests/queries/0_stateless/02721_parquet_field_not_found.sh
index 72925afec6e..8b366d0e9a2 100755
--- a/tests/queries/0_stateless/02721_parquet_field_not_found.sh
+++ b/tests/queries/0_stateless/02721_parquet_field_not_found.sh
@@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_LOCAL -q "select 42 as x format Parquet" | $CLICKHOUSE_LOCAL --input-format Parquet --structure "x UInt32, y UInt32" -q "select * from table" 2>&1 | grep -c "THERE_IS_NO_COLUMN"
+$CLICKHOUSE_LOCAL -q "select 42 as x format Parquet" | $CLICKHOUSE_LOCAL --input-format Parquet --structure "x UInt32, y UInt32" --input_format_parquet_allow_missing_columns=0 -q "select * from table" 2>&1 | grep -c "THERE_IS_NO_COLUMN"
 
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.reference b/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
index 3850ef3935b..864cd780ddb 100644
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.reference
@@ -1,18 +1,18 @@
-c	Nullable(String)					
-b	Nullable(Int64)					
-obj	Tuple(f2 Nullable(String), f3 Nullable(Int64), f1 Nullable(Int64))					
 a	Nullable(Int64)					
-{"c":"hello","b":null,"obj":{"f2":null,"f3":null,"f1":null},"a":null}
-{"c":null,"b":"2","obj":{"f2":"Some string","f3":"2","f1":null},"a":null}
-{"c":null,"b":null,"obj":{"f2":"2020-01-01","f3":null,"f1":"1"},"a":"1"}
+obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+b	Nullable(Int64)					
+c	Nullable(String)					
+{"a":"1","obj":{"f1":"1","f2":"2020-01-01","f3":null},"b":null,"c":null}
+{"a":null,"obj":{"f1":null,"f2":"Some string","f3":"2"},"b":"2","c":null}
+{"a":null,"obj":{"f1":null,"f2":null,"f3":null},"b":null,"c":"hello"}
 UNION	data1.jsonl	a Nullable(Int64), obj Tuple(f1 Nullable(Int64), f2 Nullable(Date))
 UNION	data2.jsonl	b Nullable(Int64), obj Tuple(f2 Nullable(String), f3 Nullable(Int64))
 UNION	data3.jsonl	c Nullable(String)
 c	Nullable(String)					
-c	Nullable(String)					
-b	Nullable(Int64)					
-obj	Tuple(f2 Nullable(String), f3 Nullable(Int64), f1 Nullable(Int64))					
 a	Nullable(Int64)					
+obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
+b	Nullable(Int64)					
+c	Nullable(String)					
 a	Nullable(Int64)					
 obj	Tuple(f1 Nullable(Int64), f2 Nullable(String), f3 Nullable(Int64))					
 b	Nullable(Int64)					
diff --git a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
index 5e53b857056..dc0dd8ae1f4 100755
--- a/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
+++ b/tests/queries/0_stateless/02900_union_schema_inference_mode.sh
@@ -12,45 +12,45 @@ echo '{"c" : "hello"}' > $CLICKHOUSE_TEST_UNIQUE_NAME/data3.jsonl
 
 $CLICKHOUSE_LOCAL -nm -q "
 set schema_inference_mode = 'union';
-desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl');
-select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl') order by tuple(*) format JSONEachRow;
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3}.jsonl');
+select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3}.jsonl') order by tuple(*) format JSONEachRow;
 select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file;
 "
 
 $CLICKHOUSE_LOCAL -nm -q "
 set schema_inference_mode = 'union';
 desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data3.jsonl');
-desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl');
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3}.jsonl');
 "
 
 cd $CLICKHOUSE_TEST_UNIQUE_NAME/ && tar -cf archive.tar data1.jsonl data2.jsonl data3.jsonl && cd ..
 
 $CLICKHOUSE_LOCAL -nm -q "
 set schema_inference_mode = 'union';
-desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data*.jsonl');
-select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data*.jsonl') order by tuple(*) format JSONEachRow;
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data{1,2,3}.jsonl');
+select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data{1,2,3}.jsonl') order by tuple(*) format JSONEachRow;
 select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file;
 "
 
 $CLICKHOUSE_LOCAL -nm -q "
 set schema_inference_mode = 'union';
 desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data3.jsonl');
-desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data*.jsonl');
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/archive.tar :: data{1,2,3}.jsonl');
 "
 
 echo 'Error' > $CLICKHOUSE_TEST_UNIQUE_NAME/data4.jsonl
-$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "Cannot extract table structure"
+$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl') settings schema_inference_mode='union'" 2>&1 | grep -c -F "Cannot extract table structure"
 
 $CLICKHOUSE_LOCAL -nm -q "
 set schema_inference_mode = 'union';
 desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{2,3}.jsonl');
-desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.jsonl');
+desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2,3,4}.jsonl');
 " 2>&1 | grep -c -F "Cannot extract table structure"
 
 echo 42 > $CLICKHOUSE_TEST_UNIQUE_NAME/data1.csv
 echo 42, 43 > $CLICKHOUSE_TEST_UNIQUE_NAME/data2.csv
 
-$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data*.csv') settings schema_inference_mode='union'" 2>&1 | grep -c -F "BAD_ARGUMENTS";
+$CLICKHOUSE_LOCAL -q "desc file('$CLICKHOUSE_TEST_UNIQUE_NAME/data{1,2}.csv') settings schema_inference_mode='union'" 2>&1 | grep -c -F "BAD_ARGUMENTS";
 
 rm -rf ${CLICKHOUSE_TEST_UNIQUE_NAME}
 

From 9eb498b48d4238158fda98884e6e5d225467fa03 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 30 Oct 2023 10:50:05 +0000
Subject: [PATCH 011/213] Fix tests for HDFS

---
 src/Storages/HDFS/StorageHDFS.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 730b694ef87..e692e900a18 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -391,6 +391,9 @@ namespace
     private:
         std::optional<ColumnsDescription> tryGetColumnsFromCache(const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_)
         {
+            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs)
+                return std::nullopt;
+
             auto & schema_cache = StorageHDFS::getSchemaCache(getContext());
             for (const auto & path_with_info : paths_with_info_)
             {

From 7689961a8f1e12bdf6050bc90dc393c59ba3f4f1 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 30 Oct 2023 19:44:02 +0000
Subject: [PATCH 012/213] Try to fix test with azuer

---
 .../test_storage_azure_blob_storage/test.py   | 336 ++++++++++++------
 1 file changed, 230 insertions(+), 106 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 909ff490481..a90879c3a00 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -29,7 +29,6 @@ def cluster():
             with_azurite=True,
         )
         cluster.start()
-
         yield cluster
     finally:
         cluster.shutdown()
@@ -50,6 +49,7 @@ def azure_query(
                 "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected",
                 "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
                 "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read",
+                "DB::Exception: Azure::Core::Http::TransportException: Fail to get a new connection"
                 "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
                 "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected",
                 "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
@@ -69,19 +69,29 @@ def azure_query(
             continue
 
 
-def get_azure_file_content(filename):
+def get_azure_file_content(filename, port):
     container_name = "cont"
-    connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
-    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(
+        str(connection_string)
+    )
     container_client = blob_service_client.get_container_client(container_name)
     blob_client = container_client.get_blob_client(filename)
     download_stream = blob_client.download_blob()
     return download_stream.readall().decode("utf-8")
 
 
-def put_azure_file_content(filename, data):
+def put_azure_file_content(filename, port, data):
     container_name = "cont"
-    connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
     blob_service_client = BlobServiceClient.from_connection_string(connection_string)
     try:
         container_client = blob_service_client.create_container(container_name)
@@ -94,8 +104,13 @@ def put_azure_file_content(filename, data):
 
 
 @pytest.fixture(autouse=True, scope="function")
-def delete_all_files():
-    connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
+def delete_all_files(cluster):
+    port = cluster.env_variables["AZURITE_PORT"]
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
     blob_service_client = BlobServiceClient.from_connection_string(connection_string)
     containers = blob_service_client.list_containers()
     for container in containers:
@@ -115,7 +130,8 @@ def test_create_table_connection_string(cluster):
     node = cluster.instances["node"]
     azure_query(
         node,
-        "CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'cont', 'test_create_connection_string', 'CSV')",
+        f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}',"
+        f"'cont', 'test_create_connection_string', 'CSV')",
     )
 
 
@@ -123,57 +139,67 @@ def test_create_table_account_string(cluster):
     node = cluster.instances["node"]
     azure_query(
         node,
-        "CREATE TABLE test_create_table_account_url (key UInt64, data String) Engine = AzureBlobStorage('http://azurite1:10000/devstoreaccount1',  'cont', 'test_create_connection_string', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')",
+        f"CREATE TABLE test_create_table_account_url (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f"'cont', 'test_create_connection_string', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')",
     )
 
 
 def test_simple_write_account_string(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "CREATE TABLE test_simple_write (key UInt64, data String) Engine = AzureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_simple_write.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')",
+        f"CREATE TABLE test_simple_write (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" 'cont', 'test_simple_write.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')",
     )
     azure_query(node, "INSERT INTO test_simple_write VALUES (1, 'a')")
-    print(get_azure_file_content("test_simple_write.csv"))
-    assert get_azure_file_content("test_simple_write.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write.csv", port))
+    assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
 
 
 def test_simple_write_connection_string(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "CREATE TABLE test_simple_write_connection_string (key UInt64, data String) Engine = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;', 'cont', 'test_simple_write_c.csv', 'CSV')",
+        f"CREATE TABLE test_simple_write_connection_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', "
+        f"'cont', 'test_simple_write_c.csv', 'CSV')",
     )
     azure_query(node, "INSERT INTO test_simple_write_connection_string VALUES (1, 'a')")
-    print(get_azure_file_content("test_simple_write_c.csv"))
-    assert get_azure_file_content("test_simple_write_c.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write_c.csv", port))
+    assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
 
 
 def test_simple_write_named_collection_1(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "CREATE TABLE test_simple_write_named_collection_1 (key UInt64, data String) Engine = AzureBlobStorage(azure_conf1)",
+        f"CREATE TABLE test_simple_write_named_collection_1 (key UInt64, data String) Engine = AzureBlobStorage(azure_conf1, "
+        f"connection_string = '{cluster.env_variables['AZURITE_CONNECTION_STRING']}')",
     )
     azure_query(
         node, "INSERT INTO test_simple_write_named_collection_1 VALUES (1, 'a')"
     )
-    print(get_azure_file_content("test_simple_write_named.csv"))
-    assert get_azure_file_content("test_simple_write_named.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write_named.csv", port))
+    assert get_azure_file_content("test_simple_write_named.csv", port) == '1,"a"\n'
     azure_query(node, "TRUNCATE TABLE test_simple_write_named_collection_1")
 
 
 def test_simple_write_named_collection_2(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "CREATE TABLE test_simple_write_named_collection_2 (key UInt64, data String) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_simple_write_named_2.csv', format='CSV')",
+        f"CREATE TABLE test_simple_write_named_collection_2 (key UInt64, data String) Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
+        f"container='cont', blob_path='test_simple_write_named_2.csv', format='CSV')",
     )
     azure_query(
         node, "INSERT INTO test_simple_write_named_collection_2 VALUES (1, 'a')"
     )
-    print(get_azure_file_content("test_simple_write_named_2.csv"))
-    assert get_azure_file_content("test_simple_write_named_2.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write_named_2.csv", port))
+    assert get_azure_file_content("test_simple_write_named_2.csv", port) == '1,"a"\n'
 
 
 def test_partition_by(cluster):
@@ -182,16 +208,19 @@ def test_partition_by(cluster):
     partition_by = "column3"
     values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)"
     filename = "test_{_partition_id}.csv"
+    port = cluster.env_variables["AZURITE_PORT"]
 
     azure_query(
         node,
-        f"CREATE TABLE test_partitioned_write ({table_format}) Engine = AzureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV') PARTITION BY {partition_by}",
+        f"CREATE TABLE test_partitioned_write ({table_format}) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV') "
+        f"PARTITION BY {partition_by}",
     )
     azure_query(node, f"INSERT INTO test_partitioned_write VALUES {values}")
 
-    assert "1,2,3\n" == get_azure_file_content("test_3.csv")
-    assert "3,2,1\n" == get_azure_file_content("test_1.csv")
-    assert "78,43,45\n" == get_azure_file_content("test_45.csv")
+    assert "1,2,3\n" == get_azure_file_content("test_3.csv", port)
+    assert "3,2,1\n" == get_azure_file_content("test_1.csv", port)
+    assert "78,43,45\n" == get_azure_file_content("test_45.csv", port)
 
 
 def test_partition_by_string_column(cluster):
@@ -200,15 +229,18 @@ def test_partition_by_string_column(cluster):
     partition_by = "col_str"
     values = "(1, 'foo/bar'), (3, 'йцук'), (78, '你好')"
     filename = "test_{_partition_id}.csv"
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        f"CREATE TABLE test_partitioned_string_write ({table_format}) Engine = AzureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV') PARTITION BY {partition_by}",
+        f"CREATE TABLE test_partitioned_string_write ({table_format}) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV') "
+        f"PARTITION BY {partition_by}",
     )
     azure_query(node, f"INSERT INTO test_partitioned_string_write VALUES {values}")
 
-    assert '1,"foo/bar"\n' == get_azure_file_content("test_foo/bar.csv")
-    assert '3,"йцук"\n' == get_azure_file_content("test_йцук.csv")
-    assert '78,"你好"\n' == get_azure_file_content("test_你好.csv")
+    assert '1,"foo/bar"\n' == get_azure_file_content("test_foo/bar.csv", port)
+    assert '3,"йцук"\n' == get_azure_file_content("test_йцук.csv", port)
+    assert '78,"你好"\n' == get_azure_file_content("test_你好.csv", port)
 
 
 def test_partition_by_const_column(cluster):
@@ -218,46 +250,54 @@ def test_partition_by_const_column(cluster):
     partition_by = "'88'"
     values_csv = "1,2,3\n3,2,1\n78,43,45\n"
     filename = "test_{_partition_id}.csv"
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        f"CREATE TABLE test_partitioned_const_write ({table_format}) Engine = AzureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV') PARTITION BY {partition_by}",
+        f"CREATE TABLE test_partitioned_const_write ({table_format}) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')"
+        f" PARTITION BY {partition_by}",
     )
     azure_query(node, f"INSERT INTO test_partitioned_const_write VALUES {values}")
-    assert values_csv == get_azure_file_content("test_88.csv")
+    assert values_csv == get_azure_file_content("test_88.csv", port)
 
 
 def test_truncate(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "CREATE TABLE test_truncate (key UInt64, data String) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_truncate.csv', format='CSV')",
+        f"CREATE TABLE test_truncate (key UInt64, data String) Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='test_truncate.csv', format='CSV')",
     )
     azure_query(node, "INSERT INTO test_truncate VALUES (1, 'a')")
-    assert get_azure_file_content("test_truncate.csv") == '1,"a"\n'
+    assert get_azure_file_content("test_truncate.csv", port) == '1,"a"\n'
     azure_query(node, "TRUNCATE TABLE test_truncate")
     with pytest.raises(Exception):
-        print(get_azure_file_content("test_truncate.csv"))
+        print(get_azure_file_content("test_truncate.csv", port))
 
 
 def test_simple_read_write(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "CREATE TABLE test_simple_read_write (key UInt64, data String) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_simple_read_write.csv', format='CSV')",
+        f"CREATE TABLE test_simple_read_write (key UInt64, data String) Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='test_simple_read_write.csv', "
+        f"format='CSV')",
     )
 
     azure_query(node, "INSERT INTO test_simple_read_write VALUES (1, 'a')")
-    assert get_azure_file_content("test_simple_read_write.csv") == '1,"a"\n'
+    assert get_azure_file_content("test_simple_read_write.csv", port) == '1,"a"\n'
     print(azure_query(node, "SELECT * FROM test_simple_read_write"))
     assert azure_query(node, "SELECT * FROM test_simple_read_write") == "1\ta\n"
 
 
 def test_create_new_files_on_insert(cluster):
     node = cluster.instances["node"]
-
     azure_query(
         node,
-        f"create table test_multiple_inserts(a Int32, b String) ENGINE = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_parquet', format='Parquet')",
+        f"create table test_multiple_inserts(a Int32, b String) ENGINE = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='test_parquet', format='Parquet')",
     )
     azure_query(node, "truncate table test_multiple_inserts")
     azure_query(
@@ -281,10 +321,10 @@ def test_create_new_files_on_insert(cluster):
 
 def test_overwrite(cluster):
     node = cluster.instances["node"]
-
     azure_query(
         node,
-        f"create table test_overwrite(a Int32, b String) ENGINE = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_parquet_overwrite', format='Parquet')",
+        f"create table test_overwrite(a Int32, b String) ENGINE = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='test_parquet_overwrite', format='Parquet')",
     )
     azure_query(node, "truncate table test_overwrite")
 
@@ -308,7 +348,8 @@ def test_insert_with_path_with_globs(cluster):
     node = cluster.instances["node"]
     azure_query(
         node,
-        f"create table test_insert_globs(a Int32, b String) ENGINE = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_insert_with_globs*', format='Parquet')",
+        f"create table test_insert_globs(a Int32, b String) ENGINE = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',  container='cont', blob_path='test_insert_with_globs*', format='Parquet')",
     )
     node.query_and_get_error(
         f"insert into table function test_insert_globs SELECT number, randomString(100) FROM numbers(500)"
@@ -331,7 +372,8 @@ def test_put_get_with_globs(cluster):
 
             azure_query(
                 node,
-                f"CREATE TABLE test_put_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')",
+                f"CREATE TABLE test_put_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
+                f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='{path}', format='CSV')",
             )
 
             query = f"insert into test_put_{i}_{j} VALUES {values}"
@@ -339,7 +381,8 @@ def test_put_get_with_globs(cluster):
 
     azure_query(
         node,
-        f"CREATE TABLE test_glob_select ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv', format='CSV')",
+        f"CREATE TABLE test_glob_select ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv', format='CSV')",
     )
     query = "select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from test_glob_select"
     assert azure_query(node, query).splitlines() == [
@@ -363,7 +406,8 @@ def test_azure_glob_scheherazade(cluster):
                 unique_num = random.randint(1, 10000)
                 azure_query(
                     node,
-                    f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')",
+                    f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
+                    f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='{path}', format='CSV')",
                 )
                 query = (
                     f"insert into test_scheherazade_{i}_{unique_num} VALUES {values}"
@@ -382,7 +426,8 @@ def test_azure_glob_scheherazade(cluster):
 
     azure_query(
         node,
-        f"CREATE TABLE test_glob_select_scheherazade ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='night_*/tale.csv', format='CSV')",
+        f"CREATE TABLE test_glob_select_scheherazade ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='night_*/tale.csv', format='CSV')",
     )
     query = "select count(), sum(column1), sum(column2), sum(column3) from test_glob_select_scheherazade"
     assert azure_query(node, query).splitlines() == ["1001\t1001\t1001\t1001"]
@@ -394,6 +439,7 @@ def test_azure_glob_scheherazade(cluster):
 )
 def test_storage_azure_get_gzip(cluster, extension, method):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     filename = f"test_get_gzip.{extension}"
     name = f"test_get_gzip_{extension}"
     data = [
@@ -420,14 +466,13 @@ def test_storage_azure_get_gzip(cluster, extension, method):
     compressed = gzip.GzipFile(fileobj=buf, mode="wb")
     compressed.write(("\n".join(data)).encode())
     compressed.close()
-    put_azure_file_content(filename, buf.getvalue())
+    put_azure_file_content(filename, port, buf.getvalue())
 
     azure_query(
         node,
-        f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = AzureBlobStorage(
-                                azure_conf2, container='cont', blob_path ='{filename}',
-                                format='CSV',
-                                compression='{method}')""",
+        f"CREATE TABLE {name} (name String, id UInt32) ENGINE = AzureBlobStorage( azure_conf2,"
+        f" storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path ='{filename}',"
+        f"format='CSV', compression='{method}')",
     )
 
     assert azure_query(node, f"SELECT sum(id) FROM {name}").splitlines() == ["565"]
@@ -439,7 +484,9 @@ def test_schema_inference_no_globs(cluster):
     table_format = "column1 UInt32, column2 String, column3 UInt32"
     azure_query(
         node,
-        f"CREATE TABLE test_schema_inference_src ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_schema_inference_no_globs.csv', format='CSVWithNames')",
+        f"CREATE TABLE test_schema_inference_src ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='test_schema_inference_no_globs.csv', format='CSVWithNames')",
     )
 
     query = f"insert into test_schema_inference_src SELECT number, toString(number), number * number FROM numbers(1000)"
@@ -447,7 +494,8 @@ def test_schema_inference_no_globs(cluster):
 
     azure_query(
         node,
-        f"CREATE TABLE test_select_inference Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='test_schema_inference_no_globs.csv')",
+        f"CREATE TABLE test_select_inference Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='test_schema_inference_no_globs.csv')",
     )
 
     print(node.query("SHOW CREATE TABLE test_select_inference"))
@@ -474,7 +522,9 @@ def test_schema_inference_from_globs(cluster):
 
             azure_query(
                 node,
-                f"CREATE TABLE test_schema_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSVWithNames')",
+                f"CREATE TABLE test_schema_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
+                f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+                f"blob_path='{path}', format='CSVWithNames')",
             )
 
             query = f"insert into test_schema_{i}_{j} VALUES {values}"
@@ -482,7 +532,8 @@ def test_schema_inference_from_globs(cluster):
 
     azure_query(
         node,
-        f"CREATE TABLE test_glob_select_inference Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv')",
+        f"CREATE TABLE test_glob_select_inference Engine = AzureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv')",
     )
 
     print(node.query("SHOW CREATE TABLE test_glob_select_inference"))
@@ -497,36 +548,47 @@ def test_schema_inference_from_globs(cluster):
 
 def test_simple_write_account_string_table_function(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_simple_write_tf.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', 'key UInt64, data String') VALUES (1, 'a')",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
+        f"'cont', 'test_simple_write_tf.csv', 'devstoreaccount1', "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', 'key UInt64, data String')"
+        f" VALUES (1, 'a')",
     )
-    print(get_azure_file_content("test_simple_write_tf.csv"))
-    assert get_azure_file_content("test_simple_write_tf.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write_tf.csv", port))
+    assert get_azure_file_content("test_simple_write_tf.csv", port) == '1,"a"\n'
 
 
 def test_simple_write_connection_string_table_function(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;', 'cont', 'test_simple_write_connection_tf.csv', 'CSV', 'auto', 'key UInt64, data String') VALUES (1, 'a')",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', "
+        f"'cont', 'test_simple_write_connection_tf.csv', 'CSV', 'auto', 'key UInt64, data String') VALUES (1, 'a')",
+    )
+    print(get_azure_file_content("test_simple_write_connection_tf.csv", port))
+    assert (
+        get_azure_file_content("test_simple_write_connection_tf.csv", port) == '1,"a"\n'
     )
-    print(get_azure_file_content("test_simple_write_connection_tf.csv"))
-    assert get_azure_file_content("test_simple_write_connection_tf.csv") == '1,"a"\n'
 
 
 def test_simple_write_named_collection_1_table_function(cluster):
     node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf1) VALUES (1, 'a')",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf1, "
+        f"connection_string = '{cluster.env_variables['AZURITE_CONNECTION_STRING']}') VALUES (1, 'a')",
     )
-    print(get_azure_file_content("test_simple_write_named.csv"))
-    assert get_azure_file_content("test_simple_write_named.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write_named.csv", port))
+    assert get_azure_file_content("test_simple_write_named.csv", port) == '1,"a"\n'
 
     azure_query(
         node,
-        "CREATE TABLE drop_table (key UInt64, data String) Engine = AzureBlobStorage(azure_conf1)",
+        f"CREATE TABLE drop_table (key UInt64, data String) Engine = AzureBlobStorage(azure_conf1, "
+        f"connection_string = '{cluster.env_variables['AZURITE_CONNECTION_STRING']};')",
     )
 
     azure_query(
@@ -537,13 +599,14 @@ def test_simple_write_named_collection_1_table_function(cluster):
 
 def test_simple_write_named_collection_2_table_function(cluster):
     node = cluster.instances["node"]
-
+    port = cluster.env_variables["AZURITE_PORT"]
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a')",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a')",
     )
-    print(get_azure_file_content("test_simple_write_named_2_tf.csv"))
-    assert get_azure_file_content("test_simple_write_named_2_tf.csv") == '1,"a"\n'
+    print(get_azure_file_content("test_simple_write_named_2_tf.csv", port))
+    assert get_azure_file_content("test_simple_write_named_2_tf.csv", port) == '1,"a"\n'
 
 
 def test_put_get_with_globs_tf(cluster):
@@ -562,9 +625,14 @@ def test_put_get_with_globs_tf(cluster):
 
             azure_query(
                 node,
-                f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+                f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+                f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
             )
-    query = f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, container='cont', blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv', format='CSV', structure='{table_format}')"
+    query = (
+        f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv', format='CSV', structure='{table_format}')"
+    )
     assert azure_query(node, query).splitlines() == [
         "450\t450\t900\t0.csv\t{bucket}/{max_path}".format(
             bucket="cont", max_path=max_path
@@ -576,10 +644,18 @@ def test_schema_inference_no_globs_tf(cluster):
     node = cluster.instances["node"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 String, column3 UInt32"
 
-    query = f"insert into table function azureBlobStorage(azure_conf2, container='cont', blob_path='test_schema_inference_no_globs_tf.csv', format='CSVWithNames', structure='{table_format}') SELECT number, toString(number), number * number FROM numbers(1000)"
+    query = (
+        f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
+        f"container='cont', blob_path='test_schema_inference_no_globs_tf.csv', format='CSVWithNames', structure='{table_format}') "
+        f"SELECT number, toString(number), number * number FROM numbers(1000)"
+    )
     azure_query(node, query)
 
-    query = "select sum(column1), sum(length(column2)), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, container='cont', blob_path='test_schema_inference_no_globs_tf.csv')"
+    query = (
+        f"select sum(column1), sum(length(column2)), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='test_schema_inference_no_globs_tf.csv')"
+    )
     assert azure_query(node, query).splitlines() == [
         "499500\t2890\t332833500\ttest_schema_inference_no_globs_tf.csv\tcont/test_schema_inference_no_globs_tf.csv"
     ]
@@ -600,10 +676,17 @@ def test_schema_inference_from_globs_tf(cluster):
             max_path = max(path, max_path)
             values = f"({i},{j},{i + j})"
 
-            query = f"insert into table function azureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values}"
+            query = (
+                f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
+                f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values}"
+            )
             azure_query(node, query)
 
-    query = f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, container='cont', blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv')"
+    query = (
+        f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{unique_prefix}/*_{{a,b,c,d}}/?.csv')"
+    )
     assert azure_query(node, query).splitlines() == [
         "450\t450\t900\t0.csv\t{bucket}/{max_path}".format(
             bucket="cont", max_path=max_path
@@ -617,15 +700,18 @@ def test_partition_by_tf(cluster):
     partition_by = "column3"
     values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)"
     filename = "test_partition_tf_{_partition_id}.csv"
+    port = cluster.env_variables["AZURITE_PORT"]
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
+        f"'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', "
+        f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
     )
 
-    assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv")
-    assert "3,2,1\n" == get_azure_file_content("test_partition_tf_1.csv")
-    assert "78,43,45\n" == get_azure_file_content("test_partition_tf_45.csv")
+    assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv", port)
+    assert "3,2,1\n" == get_azure_file_content("test_partition_tf_1.csv", port)
+    assert "78,43,45\n" == get_azure_file_content("test_partition_tf_45.csv", port)
 
 
 def test_filter_using_file(cluster):
@@ -637,45 +723,64 @@ def test_filter_using_file(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont', '{filename}', "
+        f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', "
+        f"'{table_format}') PARTITION BY {partition_by} VALUES {values}",
     )
 
-    query = f"select count(*) from azureBlobStorage('http://azurite1:10000/devstoreaccount1',  'cont', 'test_partition_tf_*.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') WHERE _file='test_partition_tf_3.csv'"
+    query = (
+        f"select count(*) from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',  'cont', 'test_partition_tf_*.csv', "
+        f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', "
+        f"'{table_format}') WHERE _file='test_partition_tf_3.csv'"
+    )
     assert azure_query(node, query) == "1\n"
 
 
 def test_read_subcolumns(cluster):
     node = cluster.instances["node"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv', "
+        f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
+        f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)",
     )
 
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.jsonl', "
+        f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', "
+        f"'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)",
     )
 
     res = node.query(
-        f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
+        f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv',"
+        f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
+        f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
     assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
 
     res = node.query(
-        f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
+        f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.jsonl',"
+        f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', "
+        f"'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
     assert res == "2\tcont/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
 
     res = node.query(
-        f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
+        f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.jsonl',"
+        f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', "
+        f"'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
     assert res == "0\tcont/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
 
     res = node.query(
-        f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
+        f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.jsonl',"
+        f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', "
+        f"'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
     )
 
     assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
@@ -683,15 +788,18 @@ def test_read_subcolumns(cluster):
 
 def test_read_from_not_existing_container(cluster):
     node = cluster.instances["node"]
-    query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1',  'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')"
+    query = (
+        f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',  'cont_not_exists', 'test_table.csv', "
+        f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')"
+    )
     expected_err_msg = "container does not exist"
     assert expected_err_msg in azure_query(node, query, expect_error="true")
 
 
 def test_function_signatures(cluster):
     node = cluster.instances["node"]
-    connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;"
-    storage_account_url = "http://azurite1:10000/devstoreaccount1"
+    connection_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
     account_name = "devstoreaccount1"
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     azure_query(
@@ -745,7 +853,8 @@ def check_profile_event_for_query(instance, file, profile_event, amount):
     query_pattern = f"azureBlobStorage%{file}".replace("'", "\\'")
     res = int(
         instance.query(
-            f"select ProfileEvents['{profile_event}'] from system.query_log where query like '%{query_pattern}%' and query not like '%ProfileEvents%' and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1"
+            f"select ProfileEvents['{profile_event}'] from system.query_log where query like '%{query_pattern}%' and query not like '%ProfileEvents%' "
+            f"and type = 'QueryFinish' order by query_start_time_microseconds desc limit 1"
         )
     )
 
@@ -804,15 +913,16 @@ def check_cache(instance, expected_files):
 
 def test_schema_inference_cache(cluster):
     node = cluster.instances["node"]
-    connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;"
-    storage_account_url = "http://azurite1:10000/devstoreaccount1"
+    connection_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
     account_name = "devstoreaccount1"
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
 
     node.query("system drop schema cache")
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') select * from numbers(100)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') "
+        f"select * from numbers(100)",
     )
 
     time.sleep(1)
@@ -826,7 +936,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
 
     time.sleep(1)
@@ -836,7 +947,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache1.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache1.jsonl', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -849,7 +961,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache2.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache2.jsonl', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -895,7 +1008,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache3.jsonl', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache3.jsonl', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -919,7 +1033,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.csv', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.csv', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -943,7 +1058,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.csv', '{account_name}', '{account_key}') select * from numbers(200) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.csv', '{account_name}', '{account_key}') "
+        f"select * from numbers(200) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -958,7 +1074,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache1.csv', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache1.csv', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -991,7 +1108,8 @@ def test_schema_inference_cache(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache.parquet', '{account_name}', '{account_key}') select * from numbers(100) settings azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache.parquet', '{account_name}', '{account_key}') "
+        f"select * from numbers(100) settings azure_truncate_on_insert=1",
     )
     time.sleep(1)
 
@@ -1007,23 +1125,29 @@ def test_schema_inference_cache(cluster):
 
 def test_filtering_by_file_or_path(cluster):
     node = cluster.instances["node"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter1.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter1.tsv', 'devstoreaccount1',  "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1",
     )
 
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter2.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter2.tsv', 'devstoreaccount1',  "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2",
     )
 
     azure_query(
         node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter3.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_filter3.tsv', 'devstoreaccount1', "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3",
     )
 
     node.query(
-        f"select count() from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_filter*.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') where _file = 'test_filter1.tsv'"
+        f"select count() from azureBlobStorage('{storage_account_url}', 'cont', 'test_filter*.tsv', 'devstoreaccount1', "
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') "
+        f"where _file = 'test_filter1.tsv'"
     )
 
     node.query("SYSTEM FLUSH LOGS")

From 940449dd59ea79229d9322e922372bc204aa5d28 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 2 Nov 2023 15:09:53 +0000
Subject: [PATCH 013/213] Try to fix azure test

---
 .../test_storage_azure_blob_storage/test.py   | 118 +++++++++---------
 1 file changed, 60 insertions(+), 58 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index a90879c3a00..3b4a5bf571b 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -49,7 +49,6 @@ def azure_query(
                 "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected",
                 "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
                 "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read",
-                "DB::Exception: Azure::Core::Http::TransportException: Fail to get a new connection"
                 "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
                 "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected",
                 "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
@@ -911,6 +910,66 @@ def check_cache(instance, expected_files):
     )
 
 
+def test_union_schema_inference_mode(cluster):
+    node = cluster.instances["node"]
+    storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
+    account_name = "devstoreaccount1"
+    account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)",
+    )
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2)",
+    )
+
+    node.query("system drop schema cache for azure")
+
+    result = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "a\tNullable(Int64)\nb\tNullable(Int64)\n"
+
+    result = node.query(
+        "select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%test_union_schema_inference%' order by file format TSV"
+    )
+    assert (
+        result == "UNION\ttest_union_schema_inference1.jsonl\ta Nullable(Int64)\n"
+        "UNION\ttest_union_schema_inference2.jsonl\tb Nullable(Int64)\n"
+    )
+    result = azure_query(
+        node,
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union' format TSV",
+    )
+    assert result == "1\t\\N\n" "\\N\t2\n"
+    node.query(f"system drop schema cache for hdfs")
+    result = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "b\tNullable(Int64)\n"
+
+    result = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+    )
+    assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error')",
+    )
+
+    error = azure_query(
+        node,
+        f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
+        expect_error="true",
+    )
+    assert "Cannot extract table structure" in error
+
+
 def test_schema_inference_cache(cluster):
     node = cluster.instances["node"]
     connection_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
@@ -1157,60 +1216,3 @@ def test_filtering_by_file_or_path(cluster):
     )
 
     assert int(result) == 1
-
-
-def test_union_schema_inference_mode(cluster):
-    node = cluster.instances["node"]
-    azure_query(
-        node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference1.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') select 1 as a",
-    )
-
-    azure_query(
-        node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference2.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') select 2 as b",
-    )
-
-    node.query("system drop schema cache for azure")
-
-    result = azure_query(
-        node,
-        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
-    )
-    assert result == "a\tNullable(Int64)\nb\tNullable(Int64)\n"
-
-    result = node.query(
-        "select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%test_union_schema_inference%' order by file format TSV"
-    )
-    assert (
-        result == "UNION\ttest_union_schema_inference1.jsonl\ta Nullable(Int64)\n"
-        "UNION\ttest_union_schema_inference2.jsonl\tb Nullable(Int64)\n"
-    )
-    result = azure_query(
-        node,
-        "select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') order by tuple(*) settings schema_inference_mode='union', describe_compact_output=1 format TSV",
-    )
-    assert result == "1\t\\N\n" "\\N\t2\n"
-    node.query(f"system drop schema cache for hdfs")
-    result = azure_query(
-        node,
-        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference2.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
-    )
-    assert result == "b\tNullable(Int64)\n"
-
-    result = azure_query(
-        node,
-        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
-    )
-    assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
-    azure_query(
-        node,
-        "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference3.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', TSV) select 'Error'",
-    )
-
-    error = azure_query(
-        node,
-        "desc azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_union_schema_inference*.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') settings schema_inference_mode='union', describe_compact_output=1 format TSV",
-        expect_error="true",
-    )
-    assert "Cannot extract table structure" in error

From 6345d94d2bde4de879e27b29920f680f5314db65 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 3 Nov 2023 16:27:04 +0000
Subject: [PATCH 014/213] Fix test

---
 tests/integration/test_storage_azure_blob_storage/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 3b4a5bf571b..6c72dece5d8 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -942,7 +942,7 @@ def test_union_schema_inference_mode(cluster):
     )
     result = azure_query(
         node,
-        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union' format TSV",
+        f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') order by tuple(*) settings schema_inference_mode='union' format TSV",
     )
     assert result == "1\t\\N\n" "\\N\t2\n"
     node.query(f"system drop schema cache for hdfs")

From 081fa9f3def0bdcc680581ba4b8e5f96ccc2d828 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 20 Nov 2023 15:53:28 +0000
Subject: [PATCH 015/213] Address comments

---
 src/Core/SettingsChangesHistory.h    |  2 +-
 src/Formats/ReadSchemaUtils.cpp      |  9 ++++++++-
 src/Formats/ReadSchemaUtils.h        | 17 ++++++++++++-----
 src/Formats/SchemaInferenceUtils.cpp |  2 +-
 4 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index caa1b28b1c5..c55c0ece15e 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,7 +80,7 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"23.10", {{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
+    {"23.11", {{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
               {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
               {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
     {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index c5630267e3f..43931be3449 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -55,7 +55,14 @@ try
     NamesAndTypesList names_and_types;
     SchemaInferenceMode mode = context->getSettingsRef().schema_inference_mode;
     if (mode == SchemaInferenceMode::UNION && !FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns", format_name);
+    {
+        String additional_message;
+        /// Better exception message for WithNames(AndTypes) formats.
+        if (format_name.ends_with("WithNames") || format_name.ends_with("WithNamesAndTypes"))
+            additional_message = " (formats -WithNames(AndTypes) support reading subset of columns only when setting input_format_with_names_use_header is enabled)";
+
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns{}", format_name, additional_message);
+    }
 
     if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name))
     {
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index aade6b28fb0..6aa8f3f9c4c 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -57,11 +57,18 @@ private:
 /// use it and won't create a read buffer.
 /// For formats that have a schema reader from the data,
 /// read buffer will be created by the provided iterator and
-/// the schema will be extracted from the data. If schema reader
-/// couldn't determine the schema we will try the next read buffer
-/// from the provided iterator if it makes sense. If the format doesn't
-/// have any schema reader or we couldn't determine the schema,
-/// an exception will be thrown.
+/// the schema will be extracted from the data. If the format doesn't
+/// have any schema reader an exception will be thrown.
+/// Reading schema can be performed in 2 modes depending on setting schema_inference_mode:
+/// 1) Default mode. In this mode ClickHouse assumes that all files have the same schema
+/// and tries to infer the schema by reading files one by one until it succeeds.
+/// If schema reader couldn't determine the schema for some file, ClickHouse will try the next
+/// file (next read buffer from the provided iterator) if it makes sense. If ClickHouse couldn't determine
+/// the resulting schema, an exception will be thrown.
+/// 2) Union mode. In this mode ClickHouse assumes that files can have different schemas,
+/// so it infer schemas of all files and then union them to the common schema. In this mode
+/// all read buffers from provided iterator will be used. If ClickHouse couldn't determine
+/// the schema for some file, an exception will be thrown.
 ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 13871904c56..229f033f65a 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -577,7 +577,7 @@ namespace
         element_types.reserve(names_to_types.size());
         for (const auto & name : element_names)
         {
-            auto types = names_to_types[name];
+            auto & types = names_to_types[name];
             transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
             /// If some element have different types in different tuples, we can't do anything
             if (!checkIfTypesAreEqual(types))

From f77c770dc9ffa9e54356bcdece54cb5c9d2f2985 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 20 Nov 2023 15:55:11 +0000
Subject: [PATCH 016/213] Don't run test in parallel

---
 .../queries/0_stateless/02375_system_schema_inference_cache.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql
index 6f656b16c69..310e22ed31f 100644
--- a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql
+++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-parallel
 
 set input_format_json_try_infer_numbers_from_strings=1;
 insert into function file('02374_data1.jsonl') select number as x, 'str' as s from numbers(10);

From 68c72d7e65f2516c74693624d2826f9680d4d473 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 20 Nov 2023 16:47:52 +0000
Subject: [PATCH 017/213] Fix possible deadlock in Template format during sync
 after error

---
 src/IO/ReadHelpers.cpp                        |  2 +-
 .../02918_template_format_deadlock.reference  |  1 +
 .../02918_template_format_deadlock.sh         | 19 +++++++++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02918_template_format_deadlock.reference
 create mode 100755 tests/queries/0_stateless/02918_template_format_deadlock.sh

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 19750906fdb..ff5743a63af 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1591,7 +1591,7 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim
         if (skip_spaces)
             skipWhitespaceIfAny(buf);
 
-        if (checkString(row_between_delimiter, buf))
+        if (buf.eof() || checkString(row_between_delimiter, buf))
             break;
     }
 }
diff --git a/tests/queries/0_stateless/02918_template_format_deadlock.reference b/tests/queries/0_stateless/02918_template_format_deadlock.reference
new file mode 100644
index 00000000000..83f5da32cc7
--- /dev/null
+++ b/tests/queries/0_stateless/02918_template_format_deadlock.reference
@@ -0,0 +1 @@
+42	43
diff --git a/tests/queries/0_stateless/02918_template_format_deadlock.sh b/tests/queries/0_stateless/02918_template_format_deadlock.sh
new file mode 100755
index 00000000000..344a8b55b0d
--- /dev/null
+++ b/tests/queries/0_stateless/02918_template_format_deadlock.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME
+TEMPLATE_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.template
+
+echo "42 | 43
+Error line" > $DATA_FILE
+echo '${a:CSV} | ${b:CSV}' > $TEMPLATE_FILE
+
+$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', Template, 'a UInt32, b UInt32') settings format_template_row='$TEMPLATE_FILE', input_format_allow_errors_num=1"
+
+rm $DATA_FILE
+rm $TEMPLATE_FILE
+

From ffa90628f004449fccda70a07897f467ffbd8658 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 20 Nov 2023 17:22:38 +0000
Subject: [PATCH 018/213] Make input format errors logger a bit better

---
 src/Formats/FormatFactory.cpp                 |  8 +++-
 .../Formats/InputFormatErrorsLogger.cpp       | 45 +++++++++++++------
 .../Formats/InputFormatErrorsLogger.h         |  4 ++
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 7fb355b6c43..c50fa1bcebd 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -346,7 +346,13 @@ InputFormatPtr FormatFactory::getInput(
     if (owned_buf)
         format->addBuffer(std::move(owned_buf));
     if (!settings.input_format_record_errors_file_path.toString().empty())
-        format->setErrorsLogger(std::make_shared<ParallelInputFormatErrorsLogger>(context));
+    {
+        if (parallel_parsing)
+            format->setErrorsLogger(std::make_shared<ParallelInputFormatErrorsLogger>(context));
+        else
+            format->setErrorsLogger(std::make_shared<InputFormatErrorsLogger>(context));
+    }
+
 
     /// It's a kludge. Because I cannot remove context from values format.
     /// (Not needed in the parallel_parsing case above because VALUES format doesn't support it.)
diff --git a/src/Processors/Formats/InputFormatErrorsLogger.cpp b/src/Processors/Formats/InputFormatErrorsLogger.cpp
index 71d51f0e04a..814c4679cf9 100644
--- a/src/Processors/Formats/InputFormatErrorsLogger.cpp
+++ b/src/Processors/Formats/InputFormatErrorsLogger.cpp
@@ -20,7 +20,7 @@ namespace
     const String DEFAULT_OUTPUT_FORMAT = "CSV";
 }
 
-InputFormatErrorsLogger::InputFormatErrorsLogger(const ContextPtr & context)
+InputFormatErrorsLogger::InputFormatErrorsLogger(const ContextPtr & context) : max_block_size(context->getSettingsRef().max_block_size)
 {
     String output_format = context->getSettingsRef().errors_output_format;
     if (!FormatFactory::instance().isOutputFormat(output_format))
@@ -59,30 +59,47 @@ InputFormatErrorsLogger::InputFormatErrorsLogger(const ContextPtr & context)
         {std::make_shared<DataTypeUInt32>(), "offset"},
         {std::make_shared<DataTypeString>(), "reason"},
         {std::make_shared<DataTypeString>(), "raw_data"}};
+    errors_columns = header.cloneEmptyColumns();
 
     writer = context->getOutputFormat(output_format, *write_buf, header);
 }
 
+
 InputFormatErrorsLogger::~InputFormatErrorsLogger()
 {
-    writer->finalize();
-    writer->flush();
-    write_buf->finalize();
+    try
+    {
+        if (!errors_columns[0]->empty())
+            writeErrors();
+        writer->finalize();
+        writer->flush();
+        write_buf->finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException("InputFormatErrorsLogger");
+    }
 }
 
 void InputFormatErrorsLogger::logErrorImpl(ErrorEntry entry)
 {
-    auto error = header.cloneEmpty();
-    auto columns = error.mutateColumns();
-    columns[0]->insert(entry.time);
-    database.empty() ? columns[1]->insertDefault() : columns[1]->insert(database);
-    table.empty() ? columns[2]->insertDefault() : columns[2]->insert(table);
-    columns[3]->insert(entry.offset);
-    columns[4]->insert(entry.reason);
-    columns[5]->insert(entry.raw_data);
-    error.setColumns(std::move(columns));
+    errors_columns[0]->insert(entry.time);
+    database.empty() ? errors_columns[1]->insertDefault() : errors_columns[1]->insert(database);
+    table.empty() ? errors_columns[2]->insertDefault() : errors_columns[2]->insert(table);
+    errors_columns[3]->insert(entry.offset);
+    errors_columns[4]->insert(entry.reason);
+    errors_columns[5]->insert(entry.raw_data);
 
-    writer->write(error);
+    if (errors_columns[0]->size() >= max_block_size)
+        writeErrors();
+}
+
+void InputFormatErrorsLogger::writeErrors()
+{
+    auto block = header.cloneEmpty();
+    block.setColumns(std::move(errors_columns));
+    writer->write(block);
+    errors_columns = header.cloneEmptyColumns();
 }
 
 void InputFormatErrorsLogger::logError(ErrorEntry entry)
diff --git a/src/Processors/Formats/InputFormatErrorsLogger.h b/src/Processors/Formats/InputFormatErrorsLogger.h
index 4b3766f4d37..b30246ff705 100644
--- a/src/Processors/Formats/InputFormatErrorsLogger.h
+++ b/src/Processors/Formats/InputFormatErrorsLogger.h
@@ -24,6 +24,7 @@ public:
 
     virtual void logError(ErrorEntry entry);
     void logErrorImpl(ErrorEntry entry);
+    void writeErrors();
 
 private:
     Block header;
@@ -34,6 +35,9 @@ private:
 
     String database;
     String table;
+
+    MutableColumns errors_columns;
+    size_t max_block_size;
 };
 
 using InputFormatErrorsLoggerPtr = std::shared_ptr<InputFormatErrorsLogger>;

From 6d3de41dc2a7545f93331c5f7721c428dfbea4d8 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 20 Nov 2023 18:07:54 +0000
Subject: [PATCH 019/213] Fix skipping lots rows with errors

---
 src/Processors/Formats/IRowInputFormat.cpp    |  2 +-
 .../Impl/ParallelParsingInputFormat.cpp       | 83 ++++++++++---------
 ...2919_skip_lots_of_parsing_errors.reference |  4 +
 .../02919_skip_lots_of_parsing_errors.sh      | 23 +++++
 4 files changed, 71 insertions(+), 41 deletions(-)
 create mode 100644 tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.reference
 create mode 100755 tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh

diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp
index 87febb6d46b..8c563b6f13b 100644
--- a/src/Processors/Formats/IRowInputFormat.cpp
+++ b/src/Processors/Formats/IRowInputFormat.cpp
@@ -128,7 +128,7 @@ Chunk IRowInputFormat::generate()
 
         RowReadExtension info;
         bool continue_reading = true;
-        for (size_t rows = 0; rows < params.max_block_size && continue_reading; ++rows)
+        for (size_t rows = 0; (rows < params.max_block_size || num_rows == 0) && continue_reading; ++rows)
         {
             try
             {
diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
index 06d5c80281f..8d0f7ef984a 100644
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
@@ -125,10 +125,6 @@ void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupPtr thread_grou
             first_parser_finished.set();
         }
 
-        // We suppose we will get at least some blocks for a non-empty buffer,
-        // except at the end of file. Also see a matching assert in readImpl().
-        assert(unit.is_last || !unit.chunk_ext.chunk.empty() || parsing_finished);
-
         std::lock_guard<std::mutex> lock(mutex);
         unit.status = READY_TO_READ;
         reader_condvar.notify_all();
@@ -199,62 +195,69 @@ Chunk ParallelParsingInputFormat::generate()
     }
 
     const auto inserter_unit_number = reader_ticket_number % processing_units.size();
-    auto & unit = processing_units[inserter_unit_number];
+    auto * unit = &processing_units[inserter_unit_number];
 
     if (!next_block_in_current_unit.has_value())
     {
-        // We have read out all the Blocks from the previous Processing Unit,
-        // wait for the current one to become ready.
-        std::unique_lock<std::mutex> lock(mutex);
-        reader_condvar.wait(lock, [&](){ return unit.status == READY_TO_READ || parsing_finished; });
-
-        if (parsing_finished)
+        while (true)
         {
-            /**
-              * Check for background exception and rethrow it before we return.
-              */
-            if (background_exception)
+            // We have read out all the Blocks from the previous Processing Unit,
+            // wait for the current one to become ready.
+            std::unique_lock<std::mutex> lock(mutex);
+            reader_condvar.wait(lock, [&]() { return unit->status == READY_TO_READ || parsing_finished; });
+
+            if (parsing_finished)
             {
-                lock.unlock();
-                cancel();
-                std::rethrow_exception(background_exception);
+                /// Check for background exception and rethrow it before we return.
+                if (background_exception)
+                {
+                    lock.unlock();
+                    cancel();
+                    std::rethrow_exception(background_exception);
+                }
+
+                return {};
             }
 
-            return {};
+            assert(unit->status == READY_TO_READ);
+
+            if (!unit->chunk_ext.chunk.empty())
+                break;
+
+            /// If this uint is last, parsing is finished.
+            if (unit->is_last)
+            {
+                parsing_finished = true;
+                return {};
+            }
+
+            /// We can get zero blocks for an entire segment if format parser
+            /// skipped all rows. For example, it can happen while using settings
+            /// input_format_allow_errors_num/input_format_allow_errors_ratio
+            /// and this segment contained only rows with errors.
+            /// Process the next unit.
+            ++reader_ticket_number;
+            unit = &processing_units[reader_ticket_number % processing_units.size()];
         }
 
-        assert(unit.status == READY_TO_READ);
         next_block_in_current_unit = 0;
     }
 
-    if (unit.chunk_ext.chunk.empty())
-    {
-        /*
-         * Can we get zero blocks for an entire segment, when the format parser
-         * skips it entire content and does not create any blocks? Probably not,
-         * but if we ever do, we should add a loop around the above if, to skip
-         * these. Also see a matching assert in the parser thread.
-         */
-        assert(unit.is_last);
-        parsing_finished = true;
-        return {};
-    }
+    assert(next_block_in_current_unit.value() < unit->chunk_ext.chunk.size());
 
-    assert(next_block_in_current_unit.value() < unit.chunk_ext.chunk.size());
-
-    Chunk res = std::move(unit.chunk_ext.chunk.at(*next_block_in_current_unit));
-    last_block_missing_values = std::move(unit.chunk_ext.block_missing_values[*next_block_in_current_unit]);
-    last_approx_bytes_read_for_chunk = unit.chunk_ext.approx_chunk_sizes.at(*next_block_in_current_unit);
+    Chunk res = std::move(unit->chunk_ext.chunk.at(*next_block_in_current_unit));
+    last_block_missing_values = std::move(unit->chunk_ext.block_missing_values[*next_block_in_current_unit]);
+    last_approx_bytes_read_for_chunk = unit->chunk_ext.approx_chunk_sizes.at(*next_block_in_current_unit);
 
     next_block_in_current_unit.value() += 1;
 
-    if (*next_block_in_current_unit == unit.chunk_ext.chunk.size())
+    if (*next_block_in_current_unit == unit->chunk_ext.chunk.size())
     {
         // parsing_finished reading this Processing Unit, move to the next one.
         next_block_in_current_unit.reset();
         ++reader_ticket_number;
 
-        if (unit.is_last)
+        if (unit->is_last)
         {
             // It it was the last unit, we're parsing_finished.
             parsing_finished = true;
@@ -263,7 +266,7 @@ Chunk ParallelParsingInputFormat::generate()
         {
             // Pass the unit back to the segmentator.
             std::lock_guard lock(mutex);
-            unit.status = READY_TO_INSERT;
+            unit->status = READY_TO_INSERT;
             segmentator_condvar.notify_all();
         }
     }
diff --git a/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.reference b/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.reference
new file mode 100644
index 00000000000..4b4c9812f09
--- /dev/null
+++ b/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.reference
@@ -0,0 +1,4 @@
+42
+100000
+42
+100000
diff --git a/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh b/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh
new file mode 100755
index 00000000000..2c54e9e68da
--- /dev/null
+++ b/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+FILE=$CLICKHOUSE_TEST_UNIQUE_NAME
+ERRORS_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.errors
+
+$CLICKHOUSE_LOCAL -q "select 'Error' from numbers(100000) format TSVRaw" > $FILE
+echo -e "42" >> $FILE
+
+$CLICKHOUSE_LOCAL -q "select * from file('$FILE', CSV, 'x UInt32') settings input_format_allow_errors_ratio=1, max_block_size=10000, input_format_parallel_parsing=0, input_format_record_errors_file_path='$ERRORS_FILE'";
+$CLICKHOUSE_LOCAL -q "select count() from file('$ERRORS_FILE', CSV)"
+rm $ERRORS_FILE
+
+$CLICKHOUSE_LOCAL -q "select * from file('$FILE', CSV, 'x UInt32') settings input_format_allow_errors_ratio=1, max_block_size=10000, input_format_parallel_parsing=1, input_format_record_errors_file_path='$ERRORS_FILE'";
+$CLICKHOUSE_LOCAL -q "select count() from file('$ERRORS_FILE', CSV)"
+rm $ERRORS_FILE
+
+rm $FILE
+

From 42e16bcc49dad255acfd1d3dce14e6a0e854b55b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 Nov 2023 13:32:39 +0100
Subject: [PATCH 020/213] SLRU for filesystem cache

---
 .../IO/CachedOnDiskReadBufferFromFile.cpp     |   4 +-
 src/Interpreters/Cache/FileCache.cpp          | 214 +++------------
 src/Interpreters/Cache/FileCache.h            |  35 ++-
 src/Interpreters/Cache/FileCacheSettings.cpp  |   6 +
 src/Interpreters/Cache/FileCacheSettings.h    |   3 +
 src/Interpreters/Cache/FileSegment.cpp        |   4 +-
 src/Interpreters/Cache/FileSegment.h          |   2 +-
 src/Interpreters/Cache/IFileCachePriority.cpp | 104 ++++++++
 src/Interpreters/Cache/IFileCachePriority.h   |  87 +++---
 .../Cache/LRUFileCachePriority.cpp            | 137 ++++++++--
 src/Interpreters/Cache/LRUFileCachePriority.h |  45 ++--
 .../Cache/SLRUFileCachePriority.cpp           | 252 ++++++++++++++++++
 .../Cache/SLRUFileCachePriority.h             |  88 ++++++
 13 files changed, 714 insertions(+), 267 deletions(-)
 create mode 100644 src/Interpreters/Cache/IFileCachePriority.cpp
 create mode 100644 src/Interpreters/Cache/SLRUFileCachePriority.cpp
 create mode 100644 src/Interpreters/Cache/SLRUFileCachePriority.h

diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 1cfdd96b271..16e2233d596 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -516,7 +516,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
         return false;
 
     current_file_segment = &file_segments->front();
-    current_file_segment->use();
+    current_file_segment->increasePriority();
     implementation_buffer = getImplementationBuffer(*current_file_segment);
 
     LOG_TEST(
@@ -842,7 +842,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
     else
     {
         implementation_buffer = getImplementationBuffer(file_segments->front());
-        file_segments->front().use();
+        file_segments->front().increasePriority();
     }
 
     chassert(!internal_buffer.empty());
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index e4d4802951a..8f75fb6f0b9 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -7,6 +7,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <Interpreters/Cache/FileCacheSettings.h>
 #include <Interpreters/Cache/LRUFileCachePriority.h>
+#include <Interpreters/Cache/SLRUFileCachePriority.h>
 #include <Interpreters/Context.h>
 #include <base/hex.h>
 #include <Common/ThreadPool.h>
@@ -62,7 +63,12 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
     , log(&Poco::Logger::get("FileCache(" + cache_name + ")"))
     , metadata(settings.base_path)
 {
-    main_priority = std::make_unique<LRUFileCachePriority>(settings.max_size, settings.max_elements);
+    if (settings.cache_policy == "LRU")
+        main_priority = std::make_unique<LRUFileCachePriority>(settings.max_size, settings.max_elements);
+    else if (settings.cache_policy == "SLRU")
+        main_priority = std::make_unique<SLRUFileCachePriority>(settings.max_size, settings.max_elements, settings.slru_size_ratio);
+    else
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown cache policy: {}", settings.cache_policy);
 
     if (settings.cache_hits_threshold)
         stash = std::make_unique<HitsCountStash>(settings.cache_hits_threshold, settings.max_elements);
@@ -511,7 +517,7 @@ KeyMetadata::iterator FileCache::addFileSegment(
         }
         else
         {
-            result_state = record_it->second->use(*lock) >= stash->hits_threshold
+            result_state = record_it->second->increasePriority(*lock) >= stash->hits_threshold
                 ? FileSegment::State::EMPTY
                 : FileSegment::State::DETACHED;
         }
@@ -572,171 +578,18 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
             file_segment.key(), file_segment.offset());
     }
 
-    struct EvictionCandidates
-    {
-        explicit EvictionCandidates(KeyMetadataPtr key_metadata_) : key_metadata(std::move(key_metadata_)) {}
-
-        void add(const FileSegmentMetadataPtr & candidate)
-        {
-            candidate->removal_candidate = true;
-            candidates.push_back(candidate);
-        }
-
-        ~EvictionCandidates()
-        {
-            /// If failed to reserve space, we don't delete the candidates but drop the flag instead
-            /// so the segments can be used again
-            for (const auto & candidate : candidates)
-                candidate->removal_candidate = false;
-        }
-
-        KeyMetadataPtr key_metadata;
-        std::vector<FileSegmentMetadataPtr> candidates;
-    };
-
-    std::unordered_map<Key, EvictionCandidates> to_delete;
-    size_t freeable_space = 0, freeable_count = 0;
-
-    auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
-    {
-        chassert(segment_metadata->file_segment->assertCorrectness());
-
-        auto & stat_by_kind = reserve_stat.stat_by_kind[segment_metadata->file_segment->getKind()];
-        if (segment_metadata->releasable())
-        {
-            const auto & key = segment_metadata->file_segment->key();
-            auto it = to_delete.find(key);
-            if (it == to_delete.end())
-                it = to_delete.emplace(key, locked_key.getKeyMetadata()).first;
-            it->second.add(segment_metadata);
-
-            stat_by_kind.releasable_size += segment_metadata->size();
-            ++stat_by_kind.releasable_count;
-
-            freeable_space += segment_metadata->size();
-            ++freeable_count;
-        }
-        else
-        {
-            stat_by_kind.non_releasable_size += segment_metadata->size();
-            ++stat_by_kind.non_releasable_count;
-
-            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionSkippedFileSegments);
-        }
-
-        return PriorityIterationResult::CONTINUE;
-    };
+    IFileCachePriority::EvictionCandidates eviction_candidates;
+    IFileCachePriority::FinalizeEvictionFunc finalize_eviction_func;
 
     if (query_priority)
     {
-        auto is_query_priority_overflow = [&]
-        {
-            const size_t new_size = query_priority->getSize(cache_lock) + size - freeable_space;
-            return new_size > query_priority->getSizeLimit();
-        };
-
-        if (is_query_priority_overflow())
-        {
-            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries);
-
-            query_priority->iterate(
-                [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
-                { return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
-                cache_lock);
-
-            if (is_query_priority_overflow())
-                return false;
-        }
-
-        LOG_TEST(
-            log, "Query limits satisfied (while reserving for {}:{})",
-            file_segment.key(), file_segment.offset());
-    }
-
-    auto is_main_priority_overflow = [main_priority_size_limit = main_priority->getSizeLimit(),
-                                      main_priority_elements_limit = main_priority->getElementsLimit(),
-                                      size,
-                                      &freeable_space,
-                                      &freeable_count,
-                                      &file_segment,
-                                      &cache_lock,
-                                      my_main_priority = this->main_priority.get(),
-                                      my_log = this->log]
-    {
-        const bool is_overflow =
-            /// size_limit == 0 means unlimited cache size
-            (main_priority_size_limit != 0 && (my_main_priority->getSize(cache_lock) + size - freeable_space > main_priority_size_limit))
-            /// elements_limit == 0 means unlimited number of cache elements
-            || (main_priority_elements_limit != 0 && freeable_count == 0
-                && my_main_priority->getElementsCount(cache_lock) == main_priority_elements_limit);
-
-        LOG_TEST(
-            my_log, "Overflow: {}, size: {}, ready to remove: {} ({} in number), current cache size: {}/{}, elements: {}/{}, while reserving for {}:{}",
-            is_overflow, size, freeable_space, freeable_count,
-            my_main_priority->getSize(cache_lock), my_main_priority->getSizeLimit(),
-            my_main_priority->getElementsCount(cache_lock), my_main_priority->getElementsLimit(),
-            file_segment.key(), file_segment.offset());
-
-        return is_overflow;
-    };
-
-    /// If we have enough space in query_priority, we are not interested about stat there anymore.
-    /// Clean the stat before iterating main_priority to avoid calculating any segment stat twice.
-    reserve_stat.stat_by_kind.clear();
-
-    if (is_main_priority_overflow())
-    {
-        ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries);
-
-        main_priority->iterate(
-            [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
-            { return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
-            cache_lock);
-
-        if (is_main_priority_overflow())
+        if (!query_priority->collectCandidatesForEviction(size, reserve_stat, eviction_candidates, {}, finalize_eviction_func, cache_lock))
             return false;
-    }
 
-    if (!file_segment.getKeyMetadata()->createBaseDirectory())
-        return false;
-
-    if (!to_delete.empty())
-    {
-        LOG_DEBUG(
-            log, "Will evict {} file segments (while reserving {} bytes for {}:{})",
-            to_delete.size(), size, file_segment.key(), file_segment.offset());
-
-        ProfileEventTimeIncrement<Microseconds> evict_watch(ProfileEvents::FilesystemCacheEvictMicroseconds);
-
-        for (auto & [current_key, deletion_info] : to_delete)
-        {
-            auto locked_key = deletion_info.key_metadata->tryLock();
-            if (!locked_key)
-                continue; /// key could become invalid after we released the key lock above, just skip it.
-
-            /// delete from vector in reverse order just for efficiency
-            auto & candidates = deletion_info.candidates;
-            while (!candidates.empty())
-            {
-                auto & candidate = candidates.back();
-                chassert(candidate->releasable());
-
-                const auto * segment = candidate->file_segment.get();
-                auto queue_it = segment->getQueueIterator();
-                chassert(queue_it);
-
-                ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
-                ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size());
-
-                locked_key->removeFileSegment(segment->offset(), segment->lock());
-                queue_it->remove(cache_lock);
-
-                if (query_context)
-                    query_context->remove(current_key, segment->offset(), cache_lock);
-
-                candidates.pop_back();
-            }
-        }
+        LOG_TEST(log, "Query limits satisfied (while reserving for {}:{})", file_segment.key(), file_segment.offset());
+        /// If we have enough space in query_priority, we are not interested about stat there anymore.
+        /// Clean the stat before iterating main_priority to avoid calculating any segment stat twice.
+        reserve_stat.stat_by_kind.clear();
     }
 
     /// A file_segment_metadata acquires a LRUQueue iterator on first successful space reservation attempt,
@@ -744,6 +597,17 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
     auto queue_iterator = file_segment.getQueueIterator();
     chassert(!queue_iterator || file_segment.getReservedSize() > 0);
 
+    if (!main_priority->collectCandidatesForEviction(size, reserve_stat, eviction_candidates, queue_iterator, finalize_eviction_func, cache_lock))
+        return false;
+
+    if (!file_segment.getKeyMetadata()->createBaseDirectory())
+        return false;
+
+    eviction_candidates.evict(cache_lock);
+
+    if (finalize_eviction_func)
+        finalize_eviction_func();
+
     if (queue_iterator)
     {
         queue_iterator->updateSize(size);
@@ -812,8 +676,7 @@ void FileCache::removeAllReleasable()
     {
         /// Remove all access information.
         auto lock = lockCache();
-        stash->records.clear();
-        stash->queue->removeAll(lock);
+        stash->clear();
     }
 }
 
@@ -1115,15 +978,7 @@ FileSegments FileCache::getSnapshot(const Key & key)
 FileSegments FileCache::dumpQueue()
 {
     assertInitialized();
-
-    FileSegments file_segments;
-    main_priority->iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata)
-    {
-        file_segments.push_back(FileSegment::getSnapshot(segment_metadata->file_segment));
-        return PriorityIterationResult::CONTINUE;
-    }, lockCache());
-
-    return file_segments;
+    return main_priority->dump(lockCache());
 }
 
 std::vector<String> FileCache::tryGetCachePaths(const Key & key)
@@ -1210,4 +1065,17 @@ FileSegments FileCache::sync()
     return file_segments;
 }
 
+FileCache::HitsCountStash::HitsCountStash(size_t hits_threashold_, size_t queue_size_)
+    : hits_threshold(hits_threashold_), queue_size(queue_size_), queue(std::make_unique<LRUFileCachePriority>(0, queue_size_))
+{
+    if (!queue_size_)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Queue size for hits queue must be non-zero");
+}
+
+void FileCache::HitsCountStash::clear()
+{
+    records.clear();
+    queue = std::make_unique<LRUFileCachePriority>(0, queue_size);
+}
+
 }
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index d7295868a0c..7c65c112869 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -40,7 +40,29 @@ struct FileCacheReserveStat
         size_t non_releasable_count;
     };
 
+    Stat stat;
     std::unordered_map<FileSegmentKind, Stat> stat_by_kind;
+
+    void update(size_t size, FileSegmentKind kind, bool releasable)
+    {
+        auto & local_stat = stat_by_kind[kind];
+        if (releasable)
+        {
+            stat.releasable_size += size;
+            ++stat.releasable_count;
+
+            local_stat.releasable_size += size;
+            ++local_stat.releasable_count;
+        }
+        else
+        {
+            stat.non_releasable_size += size;
+            ++stat.non_releasable_count;
+
+            local_stat.non_releasable_size += size;
+            ++local_stat.non_releasable_count;
+        }
+    }
 };
 
 /// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
@@ -53,7 +75,6 @@ public:
     using Priority = IFileCachePriority;
     using PriorityEntry = IFileCachePriority::Entry;
     using PriorityIterator = IFileCachePriority::Iterator;
-    using PriorityIterationResult = IFileCachePriority::IterationResult;
 
     FileCache(const std::string & cache_name, const FileCacheSettings & settings);
 
@@ -172,15 +193,13 @@ private:
 
     struct HitsCountStash
     {
-        HitsCountStash(size_t hits_threashold_, size_t queue_size_)
-            : hits_threshold(hits_threashold_), queue(std::make_unique<LRUFileCachePriority>(0, queue_size_))
-        {
-            if (!queue_size_)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Queue size for hits queue must be non-zero");
-        }
+        HitsCountStash(size_t hits_threashold_, size_t queue_size_);
+        void clear();
 
         const size_t hits_threshold;
-        FileCachePriorityPtr queue;
+        const size_t queue_size;
+
+        std::unique_ptr<LRUFileCachePriority> queue;
         using Records = std::unordered_map<KeyAndOffset, PriorityIterator, FileCacheKeyAndOffsetHash>;
         Records records;
     };
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index 2ac54006a8e..4fe29e241a4 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -58,6 +58,12 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin
 
     if (has("load_metadata_threads"))
         load_metadata_threads = get_uint("load_metadata_threads");
+
+    if (has("cache_policy"))
+        cache_policy = get_string("cache_policy");
+
+    // if (has("slru_size_ratio"))
+    //     slru_size_ratio = get_double("slru_size_ratio");
 }
 
 void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h
index bf0dd492bc9..bb9c4d17435 100644
--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@@ -31,6 +31,9 @@ struct FileCacheSettings
 
     size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS;
 
+    std::string cache_policy = "LRU";
+    double slru_size_ratio = 0.5;
+
     void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
     void loadFromCollection(const NamedCollection & collection);
 
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 9a0c1ac5654..8216d7a9a81 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -906,7 +906,7 @@ void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &)
     setDetachedState(lock);
 }
 
-void FileSegment::use()
+void FileSegment::increasePriority()
 {
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FileSegmentUseMicroseconds);
 
@@ -920,7 +920,7 @@ void FileSegment::use()
     if (it)
     {
         auto cache_lock = cache->lockCache();
-        hits_count = it->use(cache_lock);
+        hits_count = it->increasePriority(cache_lock);
     }
 }
 
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 7f84a3ea177..d509b45e35c 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -209,7 +209,7 @@ public:
     /// is not going to be changed. Completed states: DOWNALODED, DETACHED.
     bool isCompleted(bool sync = false) const;
 
-    void use();
+    void increasePriority();
 
     /**
      * ========== Methods used by `cache` ========================
diff --git a/src/Interpreters/Cache/IFileCachePriority.cpp b/src/Interpreters/Cache/IFileCachePriority.cpp
new file mode 100644
index 00000000000..342a9589a1d
--- /dev/null
+++ b/src/Interpreters/Cache/IFileCachePriority.cpp
@@ -0,0 +1,104 @@
+#include <Interpreters/Cache/IFileCachePriority.h>
+#include <Interpreters/Cache/Metadata.h>
+#include <Common/CurrentMetrics.h>
+
+
+namespace CurrentMetrics
+{
+    extern const Metric FilesystemCacheSizeLimit;
+}
+
+namespace ProfileEvents
+{
+    extern const Event FilesystemCacheEvictMicroseconds;
+    extern const Event FilesystemCacheEvictedBytes;
+    extern const Event FilesystemCacheEvictedFileSegments;
+}
+
+namespace DB
+{
+
+IFileCachePriority::IFileCachePriority(size_t max_size_, size_t max_elements_)
+    : max_size(max_size_), max_elements(max_elements_)
+{
+    CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_);
+}
+
+IFileCachePriority::Entry::Entry(
+    const Key & key_,
+    size_t offset_,
+    size_t size_,
+    KeyMetadataPtr key_metadata_)
+    : key(key_)
+    , offset(offset_)
+    , key_metadata(key_metadata_)
+    , size(size_)
+{
+}
+
+IFileCachePriority::Entry::Entry(const Entry & other)
+    : key(other.key)
+    , offset(other.offset)
+    , key_metadata(other.key_metadata)
+    , size(other.size.load())
+    , hits(other.hits)
+{
+}
+
+IFileCachePriority::EvictionCandidates::~EvictionCandidates()
+{
+    /// If failed to reserve space, we don't delete the candidates but drop the flag instead
+    /// so the segments can be used again
+    for (const auto & [key, key_candidates] : candidates)
+    {
+        for (const auto & candidate : key_candidates.candidates)
+            candidate->removal_candidate = false;
+    }
+}
+
+void IFileCachePriority::EvictionCandidates::add(const KeyMetadataPtr & key, const FileSegmentMetadataPtr & candidate)
+{
+    auto it = candidates.emplace(key->key, KeyCandidates{}).first;
+    it->second.candidates.push_back(candidate);
+    candidate->removal_candidate = true;
+}
+
+void IFileCachePriority::EvictionCandidates::evict(const CacheGuard::Lock & lock)
+{
+    if (candidates.empty())
+        return;
+
+    auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds);
+
+    for (auto & [key, key_candidates] : candidates)
+    {
+        auto locked_key = key_candidates.key_metadata->tryLock();
+        if (!locked_key)
+            continue; /// key could become invalid after we released the key lock above, just skip it.
+
+        /// delete from vector in reverse order just for efficiency
+        auto & to_evict = key_candidates.candidates;
+        while (!to_evict.empty())
+        {
+            auto & candidate = to_evict.back();
+            chassert(candidate->releasable());
+
+            const auto * segment = candidate->file_segment.get();
+            auto queue_it = segment->getQueueIterator();
+            chassert(queue_it);
+
+            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
+            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size());
+
+            locked_key->removeFileSegment(segment->offset(), segment->lock());
+            queue_it->remove(lock);
+
+            // if (query_context)
+            //     query_context->remove(current_key, segment->offset(), cache_lock);
+
+            to_evict.pop_back();
+        }
+    }
+}
+
+}
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index 7de380c163b..da09e927840 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <memory>
-#include <mutex>
 #include <Core/Types.h>
 #include <Common/Exception.h>
 #include <Interpreters/Cache/FileCacheKey.h>
@@ -10,6 +9,7 @@
 
 namespace DB
 {
+struct FileCacheReserveStat;
 
 /// IFileCachePriority is used to maintain the priority of cached data.
 class IFileCachePriority : private boost::noncopyable
@@ -20,53 +20,35 @@ public:
 
     struct Entry
     {
-        Entry(const Key & key_, size_t offset_, size_t size_, KeyMetadataPtr key_metadata_)
-            : key(key_), offset(offset_), size(size_), key_metadata(key_metadata_) {}
-
-        Entry(const Entry & other)
-            : key(other.key), offset(other.offset), size(other.size.load()), hits(other.hits), key_metadata(other.key_metadata) {}
+        Entry(const Key & key_, size_t offset_, size_t size_, KeyMetadataPtr key_metadata_);
+        Entry(const Entry & other);
 
         const Key key;
         const size_t offset;
+        const KeyMetadataPtr key_metadata;
+
         std::atomic<size_t> size;
         size_t hits = 0;
-        const KeyMetadataPtr key_metadata;
     };
 
-    /// Provides an iterator to traverse the cache priority. Under normal circumstances,
-    /// the iterator can only return the records that have been directly swapped out.
-    /// For example, in the LRU algorithm, it can traverse all records, but in the LRU-K, it
-    /// can only traverse the records in the low priority queue.
     class IIterator
     {
     public:
         virtual ~IIterator() = default;
 
-        virtual size_t use(const CacheGuard::Lock &) = 0;
+        virtual const Entry & getEntry() const = 0;
+
+        virtual size_t increasePriority(const CacheGuard::Lock &) = 0;
+
+        virtual void updateSize(int64_t size) = 0;
 
         virtual void remove(const CacheGuard::Lock &) = 0;
 
-        virtual const Entry & getEntry() const = 0;
-
-        virtual Entry & getEntry() = 0;
-
         virtual void invalidate() = 0;
-
-        virtual void updateSize(int64_t size) = 0;
     };
-
     using Iterator = std::shared_ptr<IIterator>;
-    using ConstIterator = std::shared_ptr<const IIterator>;
 
-    enum class IterationResult
-    {
-        BREAK,
-        CONTINUE,
-        REMOVE_AND_CONTINUE,
-    };
-    using IterateFunc = std::function<IterationResult(LockedKey &, const FileSegmentMetadataPtr &)>;
-
-    IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) {}
+    IFileCachePriority(size_t max_size_, size_t max_elements_);
 
     virtual ~IFileCachePriority() = default;
 
@@ -78,19 +60,46 @@ public:
 
     virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
 
-    virtual Iterator add(
-        KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
-
-    virtual void pop(const CacheGuard::Lock &) = 0;
-
-    virtual void removeAll(const CacheGuard::Lock &) = 0;
-
-    /// From lowest to highest priority.
-    virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
+    virtual Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
 
     virtual void shuffle(const CacheGuard::Lock &) = 0;
 
-private:
+    virtual FileSegments dump(const CacheGuard::Lock &) = 0;
+
+    class EvictionCandidates
+    {
+    public:
+        ~EvictionCandidates();
+
+        void add(const KeyMetadataPtr & key, const FileSegmentMetadataPtr & candidate);
+
+        void evict(const CacheGuard::Lock &);
+
+        auto begin() const { return candidates.begin(); }
+        auto end() const { return candidates.end(); }
+
+    private:
+        struct KeyCandidates
+        {
+            KeyMetadataPtr key_metadata;
+            std::vector<FileSegmentMetadataPtr> candidates;
+        };
+
+        std::unordered_map<Key, KeyCandidates> candidates;
+    };
+
+    using EvictionCandidatesPtr = std::unique_ptr<EvictionCandidates>;
+    using FinalizeEvictionFunc = std::function<void()>;
+
+    virtual bool collectCandidatesForEviction(
+        size_t size,
+        FileCacheReserveStat & stat,
+        IFileCachePriority::EvictionCandidates & res,
+        IFileCachePriority::Iterator it,
+        FinalizeEvictionFunc & finalize_eviction_func,
+        const CacheGuard::Lock &) = 0;
+
+protected:
     const size_t max_size = 0;
     const size_t max_elements = 0;
 };
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 5ecea95b1db..addbb55e22d 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -11,6 +11,12 @@ namespace CurrentMetrics
     extern const Metric FilesystemCacheElements;
 }
 
+namespace ProfileEvents
+{
+    extern const Event FilesystemCacheEvictionSkippedFileSegments;
+    extern const Event FilesystemCacheEvictionTries;
+}
+
 namespace DB
 {
 
@@ -68,21 +74,7 @@ IFileCachePriority::Iterator LRUFileCachePriority::add(
     return std::make_shared<LRUFileCacheIterator>(this, iter);
 }
 
-void LRUFileCachePriority::removeAll(const CacheGuard::Lock &)
-{
-    LOG_TEST(log, "Removed all entries from LRU queue");
-
-    updateSize(-current_size);
-    updateElementsCount(-current_elements_num);
-    queue.clear();
-}
-
-void LRUFileCachePriority::pop(const CacheGuard::Lock &)
-{
-    remove(queue.begin());
-}
-
-LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it)
+LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it, const CacheGuard::Lock &)
 {
     /// If size is 0, entry is invalidated, current_elements_num was already updated.
     if (it->size)
@@ -119,21 +111,21 @@ LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
 {
 }
 
-void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock &)
+void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & lock)
 {
     for (auto it = queue.begin(); it != queue.end();)
     {
         auto locked_key = it->key_metadata->tryLock();
         if (!locked_key || it->size == 0)
         {
-            it = remove(it);
+            it = remove(it, lock);
             continue;
         }
 
         auto metadata = locked_key->tryGetByOffset(it->offset);
         if (!metadata)
         {
-            it = remove(it);
+            it = remove(it, lock);
             continue;
         }
 
@@ -160,17 +152,115 @@ void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock &
             }
             case IterationResult::REMOVE_AND_CONTINUE:
             {
-                it = remove(it);
+                it = remove(it, lock);
                 break;
             }
         }
     }
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &)
+bool LRUFileCachePriority::collectCandidatesForEviction(
+    size_t size,
+    FileCacheReserveStat & stat,
+    IFileCachePriority::EvictionCandidates & res,
+    IFileCachePriority::Iterator,
+    FinalizeEvictionFunc &,
+    const CacheGuard::Lock & lock)
+{
+    auto is_overflow = [&]
+    {
+        return (max_size != 0 && (current_size + size - stat.stat.releasable_size > max_size))
+            || (max_elements != 0 && stat.stat.releasable_count == 0 && current_elements_num == max_elements);
+    };
+
+    if (!is_overflow())
+        return false;
+
+    ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries);
+
+    IterateFunc iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
+    {
+        const auto & file_segment = segment_metadata->file_segment;
+        chassert(file_segment->assertCorrectness());
+
+        if (segment_metadata->releasable())
+        {
+            res.add(locked_key.getKeyMetadata(), segment_metadata);
+            stat.update(segment_metadata->size(), file_segment->getKind(), true);
+        }
+        else
+        {
+            stat.update(segment_metadata->size(), file_segment->getKind(), false);
+            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionSkippedFileSegments);
+        }
+
+        return IterationResult::CONTINUE;
+    };
+
+    iterate(
+        [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
+        { return is_overflow() ? iterate_func(locked_key, segment_metadata) : IterationResult::BREAK; },
+        lock);
+
+    return is_overflow();
+}
+
+size_t LRUFileCachePriority::increasePriority(LRUQueueIterator it, const CacheGuard::Lock &)
+{
+    queue.splice(queue.end(), queue, it);
+    return ++it->hits;
+}
+
+LRUFileCachePriority::LRUQueueIterator
+LRUFileCachePriority::move(LRUQueueIterator it, LRUFileCachePriority & other, const CacheGuard::Lock &)
+{
+    const size_t size = it->size;
+    if (size == 0)
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Adding zero size entries to LRU queue is not allowed "
+            "(key: {}, offset: {})", it->key, it->offset);
+    }
+#ifndef NDEBUG
+    for (const auto & entry : queue)
+    {
+        /// entry.size == 0 means entry was invalidated.
+        if (entry.size != 0 && entry.key == it->key && entry.offset == it->offset)
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Attempt to add duplicate queue entry to queue. "
+                "(Key: {}, offset: {}, size: {})",
+                entry.key, entry.offset, entry.size);
+    }
+#endif
+
+    queue.splice(queue.end(), other.queue, it);
+
+    updateSize(size);
+    updateElementsCount(1);
+
+    other.updateSize(-size);
+    other.updateElementsCount(-1);
+
+    return queue.end();
+}
+
+FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
+{
+    FileSegments res;
+    iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata)
+    {
+        res.push_back(FileSegment::getSnapshot(segment_metadata->file_segment));
+        return IterationResult::CONTINUE;
+    }, lock);
+    return res;
+}
+
+void LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock & lock)
 {
     checkUsable();
-    cache_priority->remove(queue_iter);
+    cache_priority->remove(queue_iter, lock);
     queue_iter = LRUQueueIterator{};
 }
 
@@ -201,11 +291,10 @@ void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
     queue_iter->size += size;
 }
 
-size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &)
+size_t LRUFileCachePriority::LRUFileCacheIterator::increasePriority(const CacheGuard::Lock & lock)
 {
     checkUsable();
-    cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter);
-    return ++queue_iter->hits;
+    return cache_priority->increasePriority(queue_iter, lock);
 }
 
 void LRUFileCachePriority::LRUFileCacheIterator::checkUsable() const
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 89f86961811..8e882fe5d9a 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -6,11 +6,6 @@
 #include <Common/logger_useful.h>
 #include "Interpreters/Cache/Guards.h"
 
-namespace CurrentMetrics
-{
-    extern const Metric FilesystemCacheSizeLimit;
-}
-
 namespace DB
 {
 
@@ -22,12 +17,10 @@ private:
     class LRUFileCacheIterator;
     using LRUQueue = std::list<Entry>;
     using LRUQueueIterator = typename LRUQueue::iterator;
+    friend class SLRUFileCachePriority;
 
 public:
-    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_)
-    {
-        CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_);
-    }
+    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
 
     size_t getSize(const CacheGuard::Lock &) const override { return current_size; }
 
@@ -35,14 +28,20 @@ public:
 
     Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
 
-    void pop(const CacheGuard::Lock &) override;
-
-    void removeAll(const CacheGuard::Lock &) override;
-
-    void iterate(IterateFunc && func, const CacheGuard::Lock &) override;
+    bool collectCandidatesForEviction(
+        size_t size,
+        FileCacheReserveStat & stat,
+        IFileCachePriority::EvictionCandidates & res,
+        IFileCachePriority::Iterator it,
+        FinalizeEvictionFunc & finalize_eviction_func,
+        const CacheGuard::Lock &) override;
 
     void shuffle(const CacheGuard::Lock &) override;
 
+    FileSegments dump(const CacheGuard::Lock &) override;
+
+    void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); }
+
 private:
     void updateElementsCount(int64_t num);
     void updateSize(int64_t size);
@@ -55,7 +54,19 @@ private:
     /// because of invalidated entries.
     std::atomic<size_t> current_elements_num = 0;
 
-    LRUQueueIterator remove(LRUQueueIterator it);
+    LRUQueueIterator remove(LRUQueueIterator it, const CacheGuard::Lock &);
+
+    enum class IterationResult
+    {
+        BREAK,
+        CONTINUE,
+        REMOVE_AND_CONTINUE,
+    };
+    using IterateFunc = std::function<IterationResult(LockedKey &, const FileSegmentMetadataPtr &)>;
+    void iterate(IterateFunc && func, const CacheGuard::Lock &);
+
+    size_t increasePriority(LRUQueueIterator it, const CacheGuard::Lock &);
+    LRUQueueIterator move(LRUQueueIterator it, LRUFileCachePriority & other, const CacheGuard::Lock &);
 };
 
 class LRUFileCachePriority::LRUFileCacheIterator : public IFileCachePriority::IIterator
@@ -67,9 +78,7 @@ public:
 
     const Entry & getEntry() const override { return *queue_iter; }
 
-    Entry & getEntry() override { return *queue_iter; }
-
-    size_t use(const CacheGuard::Lock &) override;
+    size_t increasePriority(const CacheGuard::Lock &) override;
 
     void remove(const CacheGuard::Lock &) override;
 
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
new file mode 100644
index 00000000000..01a98452e7a
--- /dev/null
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -0,0 +1,252 @@
+#include <Interpreters/Cache/SLRUFileCachePriority.h>
+#include <Interpreters/Cache/FileCache.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/randomSeed.h>
+#include <Common/logger_useful.h>
+#include <Common/assert_cast.h>
+#include <pcg-random/pcg_random.hpp>
+
+namespace CurrentMetrics
+{
+    extern const Metric FilesystemCacheSize;
+    extern const Metric FilesystemCacheElements;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+    size_t getRatio(size_t total, double ratio)
+    {
+        return static_cast<size_t>(total * std::max(0.0, std::min(1.0, ratio)));
+    }
+}
+
+SLRUFileCachePriority::SLRUFileCachePriority(
+    size_t max_size_,
+    size_t max_elements_,
+    double size_ratio)
+    : IFileCachePriority(max_size_, max_elements_)
+    , protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio)))
+    , probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio)))
+{
+    LOG_DEBUG(
+        log, "Using probationary queue size: {}, protected queue size: {}",
+        probationary_queue.getSizeLimit(), protected_queue.getSizeLimit());
+}
+
+size_t SLRUFileCachePriority::getSize(const CacheGuard::Lock & lock) const
+{
+    return protected_queue.getSize(lock) + probationary_queue.getSize(lock);
+}
+
+size_t SLRUFileCachePriority::getElementsCount(const CacheGuard::Lock & lock) const
+{
+    return protected_queue.getElementsCount(lock) + probationary_queue.getElementsCount(lock);
+}
+
+IFileCachePriority::Iterator SLRUFileCachePriority::add(
+    KeyMetadataPtr key_metadata,
+    size_t offset,
+    size_t size,
+    const CacheGuard::Lock & lock)
+{
+    return probationary_queue.add(key_metadata, offset, size, lock);
+}
+
+SLRUFileCachePriority::SLRUQueueIterator
+SLRUFileCachePriority::remove(SLRUQueueIterator it, bool is_protected, const CacheGuard::Lock & lock)
+{
+    if (is_protected)
+        return protected_queue.remove(it, lock);
+    else
+        return probationary_queue.remove(it, lock);
+}
+
+void SLRUFileCachePriority::updateSize(int64_t size, bool is_protected)
+{
+    if (is_protected)
+        protected_queue.updateSize(size);
+    else
+        probationary_queue.updateSize(size);
+}
+
+void SLRUFileCachePriority::updateElementsCount(int64_t num, bool is_protected)
+{
+    if (is_protected)
+        protected_queue.updateElementsCount(num);
+    else
+        probationary_queue.updateElementsCount(num);
+}
+
+bool SLRUFileCachePriority::collectCandidatesForEviction(
+    size_t size,
+    FileCacheReserveStat & stat,
+    IFileCachePriority::EvictionCandidates & res,
+    IFileCachePriority::Iterator it,
+    FinalizeEvictionFunc & finalize_eviction_func,
+    const CacheGuard::Lock & lock)
+{
+    bool is_protected = false;
+    if (it)
+        is_protected = assert_cast<SLRUFileCacheIterator *>(it.get())->is_protected;
+
+    if (!is_protected)
+    {
+        return probationary_queue.collectCandidatesForEviction(size, stat, res, it, finalize_eviction_func, lock);
+    }
+
+    auto downgrade_candidates = std::make_shared<IFileCachePriority::EvictionCandidates>();
+    FileCacheReserveStat downgrade_stat;
+    FinalizeEvictionFunc noop;
+
+    if (!protected_queue.collectCandidatesForEviction(size, downgrade_stat, *downgrade_candidates, it, noop, lock))
+        return false;
+
+    if (!probationary_queue.collectCandidatesForEviction(downgrade_stat.stat.releasable_size, stat, res, it, noop, lock))
+        return false;
+
+    finalize_eviction_func = [=, lk = &lock, this]() mutable
+    {
+        for (const auto & [key, key_candidates] : *downgrade_candidates)
+        {
+            for (const auto & candidate : key_candidates.candidates)
+            {
+                auto * candidate_it = assert_cast<SLRUFileCacheIterator *>(candidate->getQueueIterator().get());
+                probationary_queue.move(candidate_it->queue_iter, protected_queue, *lk);
+            }
+        }
+    };
+
+    return true;
+}
+
+SLRUFileCachePriority::SLRUQueueIterator
+SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protected, const CacheGuard::Lock & lock)
+{
+    if (is_protected)
+    {
+        protected_queue.increasePriority(it, lock);
+        return it;
+    }
+
+    if (it->size > protected_queue.getSizeLimit())
+    {
+        /// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
+        /// which is not possible in any realistic cache configuration.
+        return {};
+    }
+
+    IFileCachePriority::EvictionCandidates downgrade_candidates;
+    FileCacheReserveStat downgrade_stat;
+    FinalizeEvictionFunc noop;
+
+    if (!protected_queue.collectCandidatesForEviction(it->size, downgrade_stat, downgrade_candidates, {}, noop, lock))
+    {
+        probationary_queue.increasePriority(it, lock);
+        return it;
+    }
+
+    IFileCachePriority::EvictionCandidates eviction_candidates;
+    FileCacheReserveStat stat;
+
+    if (it->size < downgrade_stat.stat.releasable_size
+        && !probationary_queue.collectCandidatesForEviction(
+            downgrade_stat.stat.releasable_size - it->size, stat, eviction_candidates, {}, noop, lock))
+    {
+        probationary_queue.increasePriority(it, lock);
+        return it;
+    }
+
+    eviction_candidates.evict(lock);
+
+    for (const auto & [key, key_candidates] : downgrade_candidates)
+    {
+        for (const auto & candidate : key_candidates.candidates)
+        {
+            auto * candidate_it = assert_cast<SLRUFileCacheIterator *>(candidate->getQueueIterator().get());
+            probationary_queue.move(candidate_it->queue_iter, protected_queue, lock);
+        }
+    }
+
+    return protected_queue.move(it, probationary_queue, lock);
+}
+
+FileSegments SLRUFileCachePriority::dump(const CacheGuard::Lock & lock)
+{
+    auto res = probationary_queue.dump(lock);
+    auto part_res = protected_queue.dump(lock);
+    res.insert(res.end(), part_res.begin(), part_res.end());
+    return res;
+}
+
+SLRUFileCachePriority::SLRUFileCacheIterator::SLRUFileCacheIterator(
+    SLRUFileCachePriority * cache_priority_,
+    SLRUFileCachePriority::SLRUQueueIterator queue_iter_,
+    bool is_protected_)
+    : cache_priority(cache_priority_)
+    , queue_iter(queue_iter_)
+    , is_protected(is_protected_)
+{
+}
+
+void SLRUFileCachePriority::SLRUFileCacheIterator::remove(const CacheGuard::Lock & lock)
+{
+    checkUsable();
+    cache_priority->remove(queue_iter, is_protected, lock);
+    queue_iter = SLRUQueueIterator{};
+}
+
+void SLRUFileCachePriority::SLRUFileCacheIterator::invalidate()
+{
+    checkUsable();
+
+    LOG_TEST(
+        cache_priority->log,
+        "Invalidating entry in SLRU queue. Key: {}, offset: {}, previous size: {}",
+        queue_iter->key, queue_iter->offset, queue_iter->size);
+
+    cache_priority->updateSize(-queue_iter->size, is_protected);
+    cache_priority->updateElementsCount(-1, is_protected);
+    queue_iter->size = 0;
+}
+
+void SLRUFileCachePriority::SLRUFileCacheIterator::updateSize(int64_t size)
+{
+    checkUsable();
+
+    LOG_TEST(
+        cache_priority->log,
+        "Update size with {} in SLRU queue for key: {}, offset: {}, previous size: {}",
+        size, queue_iter->key, queue_iter->offset, queue_iter->size);
+
+    cache_priority->updateSize(size, is_protected);
+    queue_iter->size += size;
+}
+
+size_t SLRUFileCachePriority::SLRUFileCacheIterator::increasePriority(const CacheGuard::Lock & lock)
+{
+    checkUsable();
+    queue_iter = cache_priority->increasePriority(queue_iter, is_protected, lock);
+    return ++queue_iter->hits;
+}
+
+void SLRUFileCachePriority::SLRUFileCacheIterator::checkUsable() const
+{
+    if (queue_iter == SLRUQueueIterator{})
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
+}
+
+void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock)
+{
+    protected_queue.shuffle(lock);
+    probationary_queue.shuffle(lock);
+}
+
+}
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
new file mode 100644
index 00000000000..cc9ab70e4a3
--- /dev/null
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -0,0 +1,88 @@
+#pragma once
+
+#include <list>
+#include <Interpreters/Cache/IFileCachePriority.h>
+#include <Interpreters/Cache/FileCacheKey.h>
+#include <Interpreters/Cache/LRUFileCachePriority.h>
+#include <Common/logger_useful.h>
+#include <Interpreters/Cache/Guards.h>
+
+namespace CurrentMetrics
+{
+    extern const Metric FilesystemCacheSizeLimit;
+}
+
+namespace DB
+{
+
+/// Based on the SLRU algorithm implementation, the record with the lowest priority is stored at
+/// the head of the queue, and the record with the highest priority is stored at the tail.
+class SLRUFileCachePriority : public IFileCachePriority
+{
+private:
+    class SLRUFileCacheIterator;
+    using LRUQueue = std::list<Entry>;
+    using SLRUQueueIterator = typename LRUQueue::iterator;
+
+public:
+    SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio);
+
+    size_t getSize(const CacheGuard::Lock & lock) const override;
+
+    size_t getElementsCount(const CacheGuard::Lock &) const override;
+
+    Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
+
+    bool collectCandidatesForEviction(
+        size_t size,
+        FileCacheReserveStat & stat,
+        IFileCachePriority::EvictionCandidates & res,
+        IFileCachePriority::Iterator,
+        FinalizeEvictionFunc & finalize_eviction_func,
+        const CacheGuard::Lock &) override;
+
+    void shuffle(const CacheGuard::Lock &) override;
+
+    FileSegments dump(const CacheGuard::Lock &) override;
+
+private:
+    void updateElementsCount(int64_t num, bool is_protected);
+    void updateSize(int64_t size, bool is_protected);
+
+    LRUFileCachePriority protected_queue;
+    LRUFileCachePriority probationary_queue;
+
+    Poco::Logger * log = &Poco::Logger::get("SLRUFileCachePriority");
+
+    SLRUQueueIterator remove(SLRUQueueIterator it, bool is_protected, const CacheGuard::Lock & lock);
+    SLRUQueueIterator increasePriority(SLRUQueueIterator & it, bool is_protected, const CacheGuard::Lock & lock);
+};
+
+class SLRUFileCachePriority::SLRUFileCacheIterator : public IFileCachePriority::IIterator
+{
+    friend class SLRUFileCachePriority;
+public:
+    SLRUFileCacheIterator(
+        SLRUFileCachePriority * cache_priority_,
+        SLRUFileCachePriority::SLRUQueueIterator queue_iter_,
+        bool is_protected_);
+
+    const Entry & getEntry() const override { return *queue_iter; }
+
+    size_t increasePriority(const CacheGuard::Lock &) override;
+
+    void remove(const CacheGuard::Lock &) override;
+
+    void invalidate() override;
+
+    void updateSize(int64_t size) override;
+
+private:
+    void checkUsable() const;
+
+    SLRUFileCachePriority * cache_priority;
+    mutable SLRUFileCachePriority::SLRUQueueIterator queue_iter;
+    const bool is_protected;
+};
+
+}

From e3e7e6c879d9a7f243693ebfa9f22c02d4e7b201 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 22 Nov 2023 15:53:19 +0100
Subject: [PATCH 021/213] Better, add comments

---
 src/Interpreters/Cache/EvictionCandidates.cpp | 71 +++++++++++++++
 src/Interpreters/Cache/EvictionCandidates.h   | 35 ++++++++
 src/Interpreters/Cache/FileCache.cpp          |  7 +-
 src/Interpreters/Cache/FileCache.h            |  8 +-
 src/Interpreters/Cache/FileCacheSettings.cpp  | 18 ++--
 src/Interpreters/Cache/FileCacheSettings.h    |  3 +-
 src/Interpreters/Cache/IFileCachePriority.cpp | 64 --------------
 src/Interpreters/Cache/IFileCachePriority.h   | 28 +-----
 .../Cache/LRUFileCachePriority.cpp            | 70 +++++++++------
 src/Interpreters/Cache/LRUFileCachePriority.h |  5 +-
 src/Interpreters/Cache/QueryLimit.h           |  4 +-
 .../Cache/SLRUFileCachePriority.cpp           | 87 +++++++++++++++----
 .../Cache/SLRUFileCachePriority.h             |  2 +-
 13 files changed, 248 insertions(+), 154 deletions(-)
 create mode 100644 src/Interpreters/Cache/EvictionCandidates.cpp
 create mode 100644 src/Interpreters/Cache/EvictionCandidates.h

diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp
new file mode 100644
index 00000000000..676a02a35da
--- /dev/null
+++ b/src/Interpreters/Cache/EvictionCandidates.cpp
@@ -0,0 +1,71 @@
+#include <Interpreters/Cache/EvictionCandidates.h>
+#include <Interpreters/Cache/Metadata.h>
+
+
+namespace ProfileEvents
+{
+    extern const Event FilesystemCacheEvictMicroseconds;
+    extern const Event FilesystemCacheEvictedBytes;
+    extern const Event FilesystemCacheEvictedFileSegments;
+}
+
+namespace DB
+{
+
+EvictionCandidates::~EvictionCandidates()
+{
+    for (const auto & [key, key_candidates] : candidates)
+    {
+        for (const auto & candidate : key_candidates.candidates)
+            candidate->removal_candidate = false;
+    }
+}
+
+void EvictionCandidates::add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate)
+{
+    auto it = candidates.emplace(locked_key.getKey(), KeyCandidates{}).first;
+    it->second.key_metadata = locked_key.getKeyMetadata();
+    it->second.candidates.push_back(candidate);
+
+    candidate->removal_candidate = true;
+    ++candidates_size;
+}
+
+void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context, const CacheGuard::Lock & lock)
+{
+    if (candidates.empty())
+        return;
+
+    auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds);
+
+    for (auto & [key, key_candidates] : candidates)
+    {
+        auto locked_key = key_candidates.key_metadata->tryLock();
+        if (!locked_key)
+            continue; /// key could become invalid after we released the key lock above, just skip it.
+
+        auto & to_evict = key_candidates.candidates;
+        while (!to_evict.empty())
+        {
+            auto & candidate = to_evict.back();
+            chassert(candidate->releasable());
+
+            const auto segment = candidate->file_segment;
+            auto queue_it = segment->getQueueIterator();
+            chassert(queue_it);
+
+            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
+            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size());
+
+            locked_key->removeFileSegment(segment->offset(), segment->lock());
+            queue_it->remove(lock);
+
+            if (query_context)
+                query_context->remove(segment->key(), segment->offset(), lock);
+
+            to_evict.pop_back();
+        }
+    }
+}
+
+}
diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h
new file mode 100644
index 00000000000..0557962d97f
--- /dev/null
+++ b/src/Interpreters/Cache/EvictionCandidates.h
@@ -0,0 +1,35 @@
+#pragma once
+#include <Interpreters/Cache/QueryLimit.h>
+
+namespace DB
+{
+
+class EvictionCandidates
+{
+public:
+    ~EvictionCandidates();
+
+    void add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate);
+
+    void evict(FileCacheQueryLimit::QueryContext * query_context, const CacheGuard::Lock &);
+
+    size_t size() const { return candidates_size; }
+
+    auto begin() const { return candidates.begin(); }
+
+    auto end() const { return candidates.end(); }
+
+private:
+    struct KeyCandidates
+    {
+        KeyMetadataPtr key_metadata;
+        std::vector<FileSegmentMetadataPtr> candidates;
+    };
+
+    std::unordered_map<FileCacheKey, KeyCandidates> candidates;
+    size_t candidates_size = 0;
+};
+
+using EvictionCandidatesPtr = std::unique_ptr<EvictionCandidates>;
+
+}
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 8f75fb6f0b9..4bfd60cac03 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -8,6 +8,7 @@
 #include <Interpreters/Cache/FileCacheSettings.h>
 #include <Interpreters/Cache/LRUFileCachePriority.h>
 #include <Interpreters/Cache/SLRUFileCachePriority.h>
+#include <Interpreters/Cache/EvictionCandidates.h>
 #include <Interpreters/Context.h>
 #include <base/hex.h>
 #include <Common/ThreadPool.h>
@@ -578,7 +579,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
             file_segment.key(), file_segment.offset());
     }
 
-    IFileCachePriority::EvictionCandidates eviction_candidates;
+    EvictionCandidates eviction_candidates;
     IFileCachePriority::FinalizeEvictionFunc finalize_eviction_func;
 
     if (query_priority)
@@ -603,10 +604,10 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
     if (!file_segment.getKeyMetadata()->createBaseDirectory())
         return false;
 
-    eviction_candidates.evict(cache_lock);
+    eviction_candidates.evict(query_context.get(), cache_lock);
 
     if (finalize_eviction_func)
-        finalize_eviction_func();
+        finalize_eviction_func(cache_lock);
 
     if (queue_iterator)
     {
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 7c65c112869..d5e18486f33 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -33,11 +33,11 @@ struct FileCacheReserveStat
 {
     struct Stat
     {
-        size_t releasable_size;
-        size_t releasable_count;
+        size_t releasable_size = 0;
+        size_t releasable_count = 0;
 
-        size_t non_releasable_size;
-        size_t non_releasable_count;
+        size_t non_releasable_size = 0;
+        size_t non_releasable_count = 0;
     };
 
     Stat stat;
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index 4fe29e241a4..0c86fb82f0d 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -13,7 +13,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetString get_string)
+void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetString get_string, FuncGetDouble get_double)
 {
     auto config_parse_size = [&](std::string_view key) { return parseWithSizeSuffix<uint64_t>(get_string(key)); };
 
@@ -62,8 +62,8 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin
     if (has("cache_policy"))
         cache_policy = get_string("cache_policy");
 
-    // if (has("slru_size_ratio"))
-    //     slru_size_ratio = get_double("slru_size_ratio");
+    if (has("slru_size_ratio"))
+        slru_size_ratio = get_double("slru_size_ratio");
 }
 
 void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
@@ -71,15 +71,17 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
     auto config_has = [&](std::string_view key) { return config.has(fmt::format("{}.{}", config_prefix, key)); };
     auto config_get_uint = [&](std::string_view key) { return config.getUInt(fmt::format("{}.{}", config_prefix, key)); };
     auto config_get_string = [&](std::string_view key) { return config.getString(fmt::format("{}.{}", config_prefix, key)); };
-    loadImpl(std::move(config_has), std::move(config_get_uint), std::move(config_get_string));
+    auto config_get_double = [&](std::string_view key) { return config.getDouble(fmt::format("{}.{}", config_prefix, key)); };
+    loadImpl(std::move(config_has), std::move(config_get_uint), std::move(config_get_string), std::move(config_get_double));
 }
 
 void FileCacheSettings::loadFromCollection(const NamedCollection & collection)
 {
-    auto config_has = [&](std::string_view key) { return collection.has(std::string(key)); };
-    auto config_get_uint = [&](std::string_view key) { return collection.get<UInt64>(std::string(key)); };
-    auto config_get_string = [&](std::string_view key) { return collection.get<String>(std::string(key)); };
-    loadImpl(std::move(config_has), std::move(config_get_uint), std::move(config_get_string));
+    auto collection_has = [&](std::string_view key) { return collection.has(std::string(key)); };
+    auto collection_get_uint = [&](std::string_view key) { return collection.get<UInt64>(std::string(key)); };
+    auto collection_get_string = [&](std::string_view key) { return collection.get<String>(std::string(key)); };
+    auto collection_get_double = [&](std::string_view key) { return collection.get<Float64>(std::string(key)); };
+    loadImpl(std::move(collection_has), std::move(collection_get_uint), std::move(collection_get_string), std::move(collection_get_double));
 }
 
 }
diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h
index bb9c4d17435..7b83639f444 100644
--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@@ -41,7 +41,8 @@ private:
     using FuncHas = std::function<bool(std::string_view)>;
     using FuncGetUInt = std::function<size_t(std::string_view)>;
     using FuncGetString = std::function<std::string(std::string_view)>;
-    void loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetString get_string);
+    using FuncGetDouble = std::function<double(std::string_view)>;
+    void loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetString get_string, FuncGetDouble get_double);
 };
 
 }
diff --git a/src/Interpreters/Cache/IFileCachePriority.cpp b/src/Interpreters/Cache/IFileCachePriority.cpp
index 342a9589a1d..9109e76562f 100644
--- a/src/Interpreters/Cache/IFileCachePriority.cpp
+++ b/src/Interpreters/Cache/IFileCachePriority.cpp
@@ -1,5 +1,4 @@
 #include <Interpreters/Cache/IFileCachePriority.h>
-#include <Interpreters/Cache/Metadata.h>
 #include <Common/CurrentMetrics.h>
 
 
@@ -8,13 +7,6 @@ namespace CurrentMetrics
     extern const Metric FilesystemCacheSizeLimit;
 }
 
-namespace ProfileEvents
-{
-    extern const Event FilesystemCacheEvictMicroseconds;
-    extern const Event FilesystemCacheEvictedBytes;
-    extern const Event FilesystemCacheEvictedFileSegments;
-}
-
 namespace DB
 {
 
@@ -45,60 +37,4 @@ IFileCachePriority::Entry::Entry(const Entry & other)
 {
 }
 
-IFileCachePriority::EvictionCandidates::~EvictionCandidates()
-{
-    /// If failed to reserve space, we don't delete the candidates but drop the flag instead
-    /// so the segments can be used again
-    for (const auto & [key, key_candidates] : candidates)
-    {
-        for (const auto & candidate : key_candidates.candidates)
-            candidate->removal_candidate = false;
-    }
-}
-
-void IFileCachePriority::EvictionCandidates::add(const KeyMetadataPtr & key, const FileSegmentMetadataPtr & candidate)
-{
-    auto it = candidates.emplace(key->key, KeyCandidates{}).first;
-    it->second.candidates.push_back(candidate);
-    candidate->removal_candidate = true;
-}
-
-void IFileCachePriority::EvictionCandidates::evict(const CacheGuard::Lock & lock)
-{
-    if (candidates.empty())
-        return;
-
-    auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds);
-
-    for (auto & [key, key_candidates] : candidates)
-    {
-        auto locked_key = key_candidates.key_metadata->tryLock();
-        if (!locked_key)
-            continue; /// key could become invalid after we released the key lock above, just skip it.
-
-        /// delete from vector in reverse order just for efficiency
-        auto & to_evict = key_candidates.candidates;
-        while (!to_evict.empty())
-        {
-            auto & candidate = to_evict.back();
-            chassert(candidate->releasable());
-
-            const auto * segment = candidate->file_segment.get();
-            auto queue_it = segment->getQueueIterator();
-            chassert(queue_it);
-
-            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
-            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size());
-
-            locked_key->removeFileSegment(segment->offset(), segment->lock());
-            queue_it->remove(lock);
-
-            // if (query_context)
-            //     query_context->remove(current_key, segment->offset(), cache_lock);
-
-            to_evict.pop_back();
-        }
-    }
-}
-
 }
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index da09e927840..1703ed09139 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -10,6 +10,7 @@
 namespace DB
 {
 struct FileCacheReserveStat;
+class EvictionCandidates;
 
 /// IFileCachePriority is used to maintain the priority of cached data.
 class IFileCachePriority : private boost::noncopyable
@@ -66,35 +67,12 @@ public:
 
     virtual FileSegments dump(const CacheGuard::Lock &) = 0;
 
-    class EvictionCandidates
-    {
-    public:
-        ~EvictionCandidates();
-
-        void add(const KeyMetadataPtr & key, const FileSegmentMetadataPtr & candidate);
-
-        void evict(const CacheGuard::Lock &);
-
-        auto begin() const { return candidates.begin(); }
-        auto end() const { return candidates.end(); }
-
-    private:
-        struct KeyCandidates
-        {
-            KeyMetadataPtr key_metadata;
-            std::vector<FileSegmentMetadataPtr> candidates;
-        };
-
-        std::unordered_map<Key, KeyCandidates> candidates;
-    };
-
-    using EvictionCandidatesPtr = std::unique_ptr<EvictionCandidates>;
-    using FinalizeEvictionFunc = std::function<void()>;
+    using FinalizeEvictionFunc = std::function<void(const CacheGuard::Lock & lk)>;
 
     virtual bool collectCandidatesForEviction(
         size_t size,
         FileCacheReserveStat & stat,
-        IFileCachePriority::EvictionCandidates & res,
+        EvictionCandidates & res,
         IFileCachePriority::Iterator it,
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) = 0;
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index addbb55e22d..c42e44f21b1 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -1,5 +1,6 @@
 #include <Interpreters/Cache/LRUFileCachePriority.h>
 #include <Interpreters/Cache/FileCache.h>
+#include <Interpreters/Cache/EvictionCandidates.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/randomSeed.h>
 #include <Common/logger_useful.h>
@@ -29,22 +30,27 @@ IFileCachePriority::Iterator LRUFileCachePriority::add(
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,
-    const CacheGuard::Lock &)
+    const CacheGuard::Lock & lock)
 {
-    const auto & key = key_metadata->key;
-    if (size == 0)
+    auto it = add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+    return std::make_shared<LRUFileCacheIterator>(this, it);
+}
+
+LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
+{
+    if (entry.size == 0)
     {
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "Adding zero size entries to LRU queue is not allowed "
-            "(key: {}, offset: {})", key, offset);
+            "(key: {}, offset: {})", entry.key, entry.offset);
     }
 
 #ifndef NDEBUG
-    for (const auto & entry : queue)
+    for (const auto & queue_entry : queue)
     {
         /// entry.size == 0 means entry was invalidated.
-        if (entry.size != 0 && entry.key == key && entry.offset == offset)
+        if (queue_entry.size != 0 && queue_entry.key == entry.key && queue_entry.offset == entry.offset)
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
                 "Attempt to add duplicate queue entry to queue. "
@@ -54,24 +60,24 @@ IFileCachePriority::Iterator LRUFileCachePriority::add(
 #endif
 
     const auto & size_limit = getSizeLimit();
-    if (size_limit && current_size + size > size_limit)
+    if (size_limit && current_size + entry.size > size_limit)
     {
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "Not enough space to add {}:{} with size {}: current size: {}/{}",
-            key, offset, size, current_size, size_limit);
+            entry.key, entry.offset, entry.size, current_size, size_limit);
     }
 
-    auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata));
+    auto it = queue.insert(queue.end(), entry);
 
-    updateSize(size);
+    updateSize(entry.size);
     updateElementsCount(1);
 
     LOG_TEST(
         log, "Added entry into LRU queue, key: {}, offset: {}, size: {}",
-        key, offset, size);
+        entry.key, entry.offset, entry.size);
 
-    return std::make_shared<LRUFileCacheIterator>(this, iter);
+    return it;
 }
 
 LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it, const CacheGuard::Lock &)
@@ -159,22 +165,27 @@ void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock &
     }
 }
 
+bool LRUFileCachePriority::canFit(size_t size, const CacheGuard::Lock & lock) const
+{
+    return canFit(size, 0, 0, lock);
+}
+
+bool LRUFileCachePriority::canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const
+{
+    return (max_size == 0 || (current_size + size - released_size_assumption <= max_size))
+        && (max_elements == 0 || current_elements_num + 1 - released_elements_assumption <= max_elements);
+}
+
 bool LRUFileCachePriority::collectCandidatesForEviction(
     size_t size,
     FileCacheReserveStat & stat,
-    IFileCachePriority::EvictionCandidates & res,
+    EvictionCandidates & res,
     IFileCachePriority::Iterator,
     FinalizeEvictionFunc &,
     const CacheGuard::Lock & lock)
 {
-    auto is_overflow = [&]
-    {
-        return (max_size != 0 && (current_size + size - stat.stat.releasable_size > max_size))
-            || (max_elements != 0 && stat.stat.releasable_count == 0 && current_elements_num == max_elements);
-    };
-
-    if (!is_overflow())
-        return false;
+    if (canFit(size, lock))
+        return true;
 
     ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries);
 
@@ -185,7 +196,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
 
         if (segment_metadata->releasable())
         {
-            res.add(locked_key.getKeyMetadata(), segment_metadata);
+            res.add(locked_key, segment_metadata);
             stat.update(segment_metadata->size(), file_segment->getKind(), true);
         }
         else
@@ -197,12 +208,17 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
         return IterationResult::CONTINUE;
     };
 
-    iterate(
-        [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
-        { return is_overflow() ? iterate_func(locked_key, segment_metadata) : IterationResult::BREAK; },
-        lock);
+    auto can_fit = [&]
+    {
+        return canFit(size, stat.stat.releasable_size, stat.stat.releasable_count, lock);
+    };
 
-    return is_overflow();
+    iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
+    {
+        return can_fit() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata);
+    }, lock);
+
+    return can_fit();
 }
 
 size_t LRUFileCachePriority::increasePriority(LRUQueueIterator it, const CacheGuard::Lock &)
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 8e882fe5d9a..99011aca06c 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -31,7 +31,7 @@ public:
     bool collectCandidatesForEviction(
         size_t size,
         FileCacheReserveStat & stat,
-        IFileCachePriority::EvictionCandidates & res,
+        EvictionCandidates & res,
         IFileCachePriority::Iterator it,
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) override;
@@ -67,6 +67,9 @@ private:
 
     size_t increasePriority(LRUQueueIterator it, const CacheGuard::Lock &);
     LRUQueueIterator move(LRUQueueIterator it, LRUFileCachePriority & other, const CacheGuard::Lock &);
+    LRUQueueIterator add(Entry && entry, const CacheGuard::Lock &);
+    bool canFit(size_t size, const CacheGuard::Lock &) const;
+    bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const;
 };
 
 class LRUFileCachePriority::LRUFileCacheIterator : public IFileCachePriority::IIterator
diff --git a/src/Interpreters/Cache/QueryLimit.h b/src/Interpreters/Cache/QueryLimit.h
index f8247e8c520..c252cd2dccc 100644
--- a/src/Interpreters/Cache/QueryLimit.h
+++ b/src/Interpreters/Cache/QueryLimit.h
@@ -36,7 +36,7 @@ public:
 
         bool recacheOnFileCacheQueryLimitExceeded() const { return recache_on_query_limit_exceeded; }
 
-        IFileCachePriority::Iterator tryGet(
+        Priority::Iterator tryGet(
             const Key & key,
             size_t offset,
             const CacheGuard::Lock &);
@@ -53,7 +53,7 @@ public:
             const CacheGuard::Lock &);
 
     private:
-        using Records = std::unordered_map<FileCacheKeyAndOffset, IFileCachePriority::Iterator, FileCacheKeyAndOffsetHash>;
+        using Records = std::unordered_map<FileCacheKeyAndOffset, Priority::Iterator, FileCacheKeyAndOffsetHash>;
         Records records;
         LRUFileCachePriority priority;
         const bool recache_on_query_limit_exceeded;
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 01a98452e7a..12119c23ce6 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -1,5 +1,6 @@
 #include <Interpreters/Cache/SLRUFileCachePriority.h>
 #include <Interpreters/Cache/FileCache.h>
+#include <Interpreters/Cache/EvictionCandidates.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/randomSeed.h>
 #include <Common/logger_useful.h>
@@ -88,38 +89,58 @@ void SLRUFileCachePriority::updateElementsCount(int64_t num, bool is_protected)
 bool SLRUFileCachePriority::collectCandidatesForEviction(
     size_t size,
     FileCacheReserveStat & stat,
-    IFileCachePriority::EvictionCandidates & res,
+    EvictionCandidates & res,
     IFileCachePriority::Iterator it,
     FinalizeEvictionFunc & finalize_eviction_func,
     const CacheGuard::Lock & lock)
 {
-    bool is_protected = false;
-    if (it)
-        is_protected = assert_cast<SLRUFileCacheIterator *>(it.get())->is_protected;
-
-    if (!is_protected)
+    /// `it` is a pointer to entry we want to evict in favour of.
+    /// If `it` is nullptr, then it is the first space reservation attempt
+    /// for a corresponding file segment, so it will be directly put into probationary queue.
+    if (!it)
     {
         return probationary_queue.collectCandidatesForEviction(size, stat, res, it, finalize_eviction_func, lock);
     }
 
-    auto downgrade_candidates = std::make_shared<IFileCachePriority::EvictionCandidates>();
+    /// If `it` not nullptr (e.g. is already in some queue),
+    /// we need to check in which queue (protected/probationary) it currently is
+    /// (in order to know where we need to free space).
+    if (!assert_cast<SLRUFileCacheIterator *>(it.get())->is_protected)
+    {
+        return probationary_queue.collectCandidatesForEviction(size, stat, res, it, finalize_eviction_func, lock);
+    }
+
+    /// Entry is in protected queue.
+    /// Check if we have enough space in protected queue to fit a new size of entry.
+    /// `size` is the increment to the current entry.size we want to increase.
+    if (protected_queue.canFit(size, lock))
+        return true;
+
+    /// If not enough space - we need to "downgrade" lowest priority entries from protected
+    /// queue to probationary queue.
+    /// The amount of such "downgraded" entries is equal to the amount
+    /// required to make space for additionary `size` bytes for entry.
+    auto downgrade_candidates = std::make_shared<EvictionCandidates>();
     FileCacheReserveStat downgrade_stat;
     FinalizeEvictionFunc noop;
 
     if (!protected_queue.collectCandidatesForEviction(size, downgrade_stat, *downgrade_candidates, it, noop, lock))
         return false;
 
-    if (!probationary_queue.collectCandidatesForEviction(downgrade_stat.stat.releasable_size, stat, res, it, noop, lock))
+    const size_t size_to_downgrade = downgrade_stat.stat.releasable_size;
+
+    if (!probationary_queue.canFit(size_to_downgrade, lock)
+        && !probationary_queue.collectCandidatesForEviction(size_to_downgrade, stat, res, it, noop, lock))
         return false;
 
-    finalize_eviction_func = [=, lk = &lock, this]() mutable
+    finalize_eviction_func = [=, this](const CacheGuard::Lock & lk) mutable
     {
         for (const auto & [key, key_candidates] : *downgrade_candidates)
         {
             for (const auto & candidate : key_candidates.candidates)
             {
                 auto * candidate_it = assert_cast<SLRUFileCacheIterator *>(candidate->getQueueIterator().get());
-                probationary_queue.move(candidate_it->queue_iter, protected_queue, *lk);
+                probationary_queue.move(candidate_it->queue_iter, protected_queue, lk);
             }
         }
     };
@@ -130,41 +151,71 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
 SLRUFileCachePriority::SLRUQueueIterator
 SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protected, const CacheGuard::Lock & lock)
 {
+    /// If entry (`it` is the pointer to the entry) is already in protected queue,
+    /// we only need to increase its priority within the protected queue.
     if (is_protected)
     {
         protected_queue.increasePriority(it, lock);
         return it;
     }
 
+    /// Entry is in probationary queue.
+    /// We need to move it to protected queue.
+
     if (it->size > protected_queue.getSizeLimit())
     {
+        /// Entry size is bigger than the whole protected queue limit.
         /// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
         /// which is not possible in any realistic cache configuration.
-        return {};
+        probationary_queue.increasePriority(it, lock);
+        return it;
     }
 
-    IFileCachePriority::EvictionCandidates downgrade_candidates;
+    /// Check if there is enough space in protected queue to move entry there.
+    /// If not - we need to "downgrade" lowest priority entries from protected
+    /// queue to probationary queue.
+    /// The amount of such "downgraded" entries is equal to the amount
+    /// required to make space for entry we want to insert.
+    EvictionCandidates downgrade_candidates;
     FileCacheReserveStat downgrade_stat;
     FinalizeEvictionFunc noop;
 
     if (!protected_queue.collectCandidatesForEviction(it->size, downgrade_stat, downgrade_candidates, {}, noop, lock))
     {
+        /// We cannot make space for entry to be moved to protected queue
+        /// (not enough releasable file segments).
+        /// Then just increase its priority within probationary queue.
         probationary_queue.increasePriority(it, lock);
         return it;
     }
 
-    IFileCachePriority::EvictionCandidates eviction_candidates;
+    /// Now we need to check if those "downgrade" candidates can actually
+    /// be moved to probationary queue.
+    const size_t size_to_downgrade = downgrade_stat.stat.releasable_count;
+    size_t size_to_free = 0;
+    if (size_to_downgrade && size_to_downgrade > it->size)
+        size_to_free = size_to_downgrade - it->size;
+
+    EvictionCandidates eviction_candidates;
     FileCacheReserveStat stat;
 
-    if (it->size < downgrade_stat.stat.releasable_size
-        && !probationary_queue.collectCandidatesForEviction(
-            downgrade_stat.stat.releasable_size - it->size, stat, eviction_candidates, {}, noop, lock))
+    if (size_to_free
+        && !probationary_queue.collectCandidatesForEviction(size_to_free, stat, eviction_candidates, {}, noop, lock))
     {
+        /// "downgrade" canidates cannot be moved to probationary queue,
+        /// so entry cannot be moved to protected queue as well.
+        /// Then just increase its priority within probationary queue.
         probationary_queue.increasePriority(it, lock);
         return it;
     }
 
-    eviction_candidates.evict(lock);
+    /// Make space for "downgrade" candidates.
+    eviction_candidates.evict(nullptr, lock);
+
+    /// All checks passed, now we can move downgrade candidates to
+    /// probationary queue and our entry to protected queue.
+    Entry entry = *it;
+    probationary_queue.remove(it, lock);
 
     for (const auto & [key, key_candidates] : downgrade_candidates)
     {
@@ -175,7 +226,7 @@ SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protecte
         }
     }
 
-    return protected_queue.move(it, probationary_queue, lock);
+    return protected_queue.add(std::move(entry), lock);
 }
 
 FileSegments SLRUFileCachePriority::dump(const CacheGuard::Lock & lock)
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index cc9ab70e4a3..a179a5285b2 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -36,7 +36,7 @@ public:
     bool collectCandidatesForEviction(
         size_t size,
         FileCacheReserveStat & stat,
-        IFileCachePriority::EvictionCandidates & res,
+        EvictionCandidates & res,
         IFileCachePriority::Iterator,
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) override;

From c22e77d8aaade3ca79872c868bb52516bcbce00c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 22 Nov 2023 22:54:38 +0100
Subject: [PATCH 022/213] Better

---
 src/Interpreters/Cache/EvictionCandidates.cpp |   5 +-
 .../Cache/LRUFileCachePriority.cpp            |  63 +++---
 src/Interpreters/Cache/LRUFileCachePriority.h |  25 ++-
 .../Cache/SLRUFileCachePriority.cpp           | 183 ++++++++----------
 .../Cache/SLRUFileCachePriority.h             |  23 +--
 5 files changed, 128 insertions(+), 171 deletions(-)

diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp
index 676a02a35da..7dceab4f95f 100644
--- a/src/Interpreters/Cache/EvictionCandidates.cpp
+++ b/src/Interpreters/Cache/EvictionCandidates.cpp
@@ -23,8 +23,9 @@ EvictionCandidates::~EvictionCandidates()
 
 void EvictionCandidates::add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate)
 {
-    auto it = candidates.emplace(locked_key.getKey(), KeyCandidates{}).first;
-    it->second.key_metadata = locked_key.getKeyMetadata();
+    auto [it, inserted] = candidates.emplace(locked_key.getKey(), KeyCandidates{});
+    if (inserted)
+        it->second.key_metadata = locked_key.getKeyMetadata();
     it->second.candidates.push_back(candidate);
 
     candidate->removal_candidate = true;
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index c42e44f21b1..5cd44a67d89 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -32,11 +32,10 @@ IFileCachePriority::Iterator LRUFileCachePriority::add(
     size_t size,
     const CacheGuard::Lock & lock)
 {
-    auto it = add(Entry(key_metadata->key, offset, size, key_metadata), lock);
-    return std::make_shared<LRUFileCacheIterator>(this, it);
+    return add(Entry(key_metadata->key, offset, size, key_metadata), lock);
 }
 
-LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
+std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
 {
     if (entry.size == 0)
     {
@@ -77,10 +76,10 @@ LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::add(Entry && entry,
         log, "Added entry into LRU queue, key: {}, offset: {}, size: {}",
         entry.key, entry.offset, entry.size);
 
-    return it;
+    return std::make_unique<LRUIterator>(this, it);
 }
 
-LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it, const CacheGuard::Lock &)
+LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CacheGuard::Lock &)
 {
     /// If size is 0, entry is invalidated, current_elements_num was already updated.
     if (it->size)
@@ -109,11 +108,8 @@ void LRUFileCachePriority::updateElementsCount(int64_t num)
 }
 
 
-LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
-    LRUFileCachePriority * cache_priority_,
-    LRUFileCachePriority::LRUQueueIterator queue_iter_)
-    : cache_priority(cache_priority_)
-    , queue_iter(queue_iter_)
+LRUFileCachePriority::LRUIterator::LRUIterator(LRUFileCachePriority * cache_priority_, LRUQueue::iterator queue_iter_)
+    : cache_priority(cache_priority_), queue_iter(queue_iter_)
 {
 }
 
@@ -221,28 +217,21 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
     return can_fit();
 }
 
-size_t LRUFileCachePriority::increasePriority(LRUQueueIterator it, const CacheGuard::Lock &)
+std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &)
 {
-    queue.splice(queue.end(), queue, it);
-    return ++it->hits;
-}
-
-LRUFileCachePriority::LRUQueueIterator
-LRUFileCachePriority::move(LRUQueueIterator it, LRUFileCachePriority & other, const CacheGuard::Lock &)
-{
-    const size_t size = it->size;
-    if (size == 0)
+    const auto & entry = it.getEntry();
+    if (entry.size == 0)
     {
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "Adding zero size entries to LRU queue is not allowed "
-            "(key: {}, offset: {})", it->key, it->offset);
+            "(key: {}, offset: {})", entry.key, entry.offset);
     }
 #ifndef NDEBUG
-    for (const auto & entry : queue)
+    for (const auto & queue_entry : queue)
     {
         /// entry.size == 0 means entry was invalidated.
-        if (entry.size != 0 && entry.key == it->key && entry.offset == it->offset)
+        if (queue_entry.size != 0 && queue_entry.key == entry.key && queue_entry.offset == entry.offset)
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
                 "Attempt to add duplicate queue entry to queue. "
@@ -251,15 +240,14 @@ LRUFileCachePriority::move(LRUQueueIterator it, LRUFileCachePriority & other, co
     }
 #endif
 
-    queue.splice(queue.end(), other.queue, it);
+    queue.splice(queue.end(), other.queue, it.queue_iter);
 
-    updateSize(size);
+    updateSize(entry.size);
     updateElementsCount(1);
 
-    other.updateSize(-size);
+    other.updateSize(-entry.size);
     other.updateElementsCount(-1);
-
-    return queue.end();
+    return std::make_unique<LRUIterator>(this, it.queue_iter);
 }
 
 FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
@@ -273,14 +261,14 @@ FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
     return res;
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock & lock)
+void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock)
 {
     checkUsable();
     cache_priority->remove(queue_iter, lock);
-    queue_iter = LRUQueueIterator{};
+    queue_iter = LRUQueue::iterator{};
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::invalidate()
+void LRUFileCachePriority::LRUIterator::invalidate()
 {
     checkUsable();
 
@@ -294,7 +282,7 @@ void LRUFileCachePriority::LRUFileCacheIterator::invalidate()
     queue_iter->size = 0;
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
+void LRUFileCachePriority::LRUIterator::updateSize(int64_t size)
 {
     checkUsable();
 
@@ -307,21 +295,22 @@ void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
     queue_iter->size += size;
 }
 
-size_t LRUFileCachePriority::LRUFileCacheIterator::increasePriority(const CacheGuard::Lock & lock)
+size_t LRUFileCachePriority::LRUIterator::increasePriority(const CacheGuard::Lock &)
 {
     checkUsable();
-    return cache_priority->increasePriority(queue_iter, lock);
+    cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter);
+    return ++queue_iter->hits;
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::checkUsable() const
+void LRUFileCachePriority::LRUIterator::checkUsable() const
 {
-    if (queue_iter == LRUQueueIterator{})
+    if (queue_iter == LRUQueue::iterator{})
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
 }
 
 void LRUFileCachePriority::shuffle(const CacheGuard::Lock &)
 {
-    std::vector<LRUQueueIterator> its;
+    std::vector<LRUQueue::iterator> its;
     its.reserve(queue.size());
     for (auto it = queue.begin(); it != queue.end(); ++it)
         its.push_back(it);
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 99011aca06c..acc8a5a9a76 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -14,9 +14,8 @@ namespace DB
 class LRUFileCachePriority : public IFileCachePriority
 {
 private:
-    class LRUFileCacheIterator;
+    class LRUIterator;
     using LRUQueue = std::list<Entry>;
-    using LRUQueueIterator = typename LRUQueue::iterator;
     friend class SLRUFileCachePriority;
 
 public:
@@ -54,7 +53,10 @@ private:
     /// because of invalidated entries.
     std::atomic<size_t> current_elements_num = 0;
 
-    LRUQueueIterator remove(LRUQueueIterator it, const CacheGuard::Lock &);
+    bool canFit(size_t size, const CacheGuard::Lock &) const;
+    bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const;
+
+    LRUQueue::iterator remove(LRUQueue::iterator it, const CacheGuard::Lock &);
 
     enum class IterationResult
     {
@@ -65,19 +67,16 @@ private:
     using IterateFunc = std::function<IterationResult(LockedKey &, const FileSegmentMetadataPtr &)>;
     void iterate(IterateFunc && func, const CacheGuard::Lock &);
 
-    size_t increasePriority(LRUQueueIterator it, const CacheGuard::Lock &);
-    LRUQueueIterator move(LRUQueueIterator it, LRUFileCachePriority & other, const CacheGuard::Lock &);
-    LRUQueueIterator add(Entry && entry, const CacheGuard::Lock &);
-    bool canFit(size_t size, const CacheGuard::Lock &) const;
-    bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const;
+    std::unique_ptr<LRUIterator> move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &);
+    std::unique_ptr<LRUIterator> add(Entry && entry, const CacheGuard::Lock &);
 };
 
-class LRUFileCachePriority::LRUFileCacheIterator : public IFileCachePriority::IIterator
+class LRUFileCachePriority::LRUIterator : public IFileCachePriority::IIterator
 {
+    friend class LRUFileCachePriority;
+    friend class SLRUFileCachePriority;
 public:
-    LRUFileCacheIterator(
-        LRUFileCachePriority * cache_priority_,
-        LRUFileCachePriority::LRUQueueIterator queue_iter_);
+    LRUIterator(LRUFileCachePriority * cache_priority_, LRUQueue::iterator queue_iter_);
 
     const Entry & getEntry() const override { return *queue_iter; }
 
@@ -93,7 +92,7 @@ private:
     void checkUsable() const;
 
     LRUFileCachePriority * cache_priority;
-    mutable LRUFileCachePriority::LRUQueueIterator queue_iter;
+    mutable LRUQueue::iterator queue_iter;
 };
 
 }
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 12119c23ce6..ee8da79ee6f 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -58,32 +58,8 @@ IFileCachePriority::Iterator SLRUFileCachePriority::add(
     size_t size,
     const CacheGuard::Lock & lock)
 {
-    return probationary_queue.add(key_metadata, offset, size, lock);
-}
-
-SLRUFileCachePriority::SLRUQueueIterator
-SLRUFileCachePriority::remove(SLRUQueueIterator it, bool is_protected, const CacheGuard::Lock & lock)
-{
-    if (is_protected)
-        return protected_queue.remove(it, lock);
-    else
-        return probationary_queue.remove(it, lock);
-}
-
-void SLRUFileCachePriority::updateSize(int64_t size, bool is_protected)
-{
-    if (is_protected)
-        protected_queue.updateSize(size);
-    else
-        probationary_queue.updateSize(size);
-}
-
-void SLRUFileCachePriority::updateElementsCount(int64_t num, bool is_protected)
-{
-    if (is_protected)
-        protected_queue.updateElementsCount(num);
-    else
-        probationary_queue.updateElementsCount(num);
+    auto it = probationary_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+    return std::make_shared<SLRUIterator>(this, std::move(it), false);
 }
 
 bool SLRUFileCachePriority::collectCandidatesForEviction(
@@ -105,7 +81,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
     /// If `it` not nullptr (e.g. is already in some queue),
     /// we need to check in which queue (protected/probationary) it currently is
     /// (in order to know where we need to free space).
-    if (!assert_cast<SLRUFileCacheIterator *>(it.get())->is_protected)
+    if (!assert_cast<SLRUIterator *>(it.get())->is_protected)
     {
         return probationary_queue.collectCandidatesForEviction(size, stat, res, it, finalize_eviction_func, lock);
     }
@@ -139,8 +115,9 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
         {
             for (const auto & candidate : key_candidates.candidates)
             {
-                auto * candidate_it = assert_cast<SLRUFileCacheIterator *>(candidate->getQueueIterator().get());
-                probationary_queue.move(candidate_it->queue_iter, protected_queue, lk);
+                auto * candidate_it = assert_cast<SLRUIterator *>(candidate->getQueueIterator().get());
+                candidate_it->lru_iterator = probationary_queue.move(*candidate_it->lru_iterator, protected_queue, lk);
+                candidate_it->is_protected = false;
             }
         }
     };
@@ -148,27 +125,30 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
     return true;
 }
 
-SLRUFileCachePriority::SLRUQueueIterator
-SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protected, const CacheGuard::Lock & lock)
+void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock)
 {
+    auto & lru_it = iterator.lru_iterator;
+    const bool is_protected = iterator.is_protected;
+    const auto & entry = lru_it->getEntry();
+
     /// If entry (`it` is the pointer to the entry) is already in protected queue,
     /// we only need to increase its priority within the protected queue.
     if (is_protected)
     {
-        protected_queue.increasePriority(it, lock);
-        return it;
+        lru_it->increasePriority(lock);
+        return;
     }
 
     /// Entry is in probationary queue.
     /// We need to move it to protected queue.
 
-    if (it->size > protected_queue.getSizeLimit())
+    if (entry.size > protected_queue.getSizeLimit())
     {
         /// Entry size is bigger than the whole protected queue limit.
         /// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
         /// which is not possible in any realistic cache configuration.
-        probationary_queue.increasePriority(it, lock);
-        return it;
+        lru_it->increasePriority(lock);
+        return;
     }
 
     /// Check if there is enough space in protected queue to move entry there.
@@ -180,21 +160,21 @@ SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protecte
     FileCacheReserveStat downgrade_stat;
     FinalizeEvictionFunc noop;
 
-    if (!protected_queue.collectCandidatesForEviction(it->size, downgrade_stat, downgrade_candidates, {}, noop, lock))
+    if (!protected_queue.collectCandidatesForEviction(entry.size, downgrade_stat, downgrade_candidates, {}, noop, lock))
     {
         /// We cannot make space for entry to be moved to protected queue
         /// (not enough releasable file segments).
         /// Then just increase its priority within probationary queue.
-        probationary_queue.increasePriority(it, lock);
-        return it;
+        lru_it->increasePriority(lock);
+        return;
     }
 
     /// Now we need to check if those "downgrade" candidates can actually
     /// be moved to probationary queue.
     const size_t size_to_downgrade = downgrade_stat.stat.releasable_count;
     size_t size_to_free = 0;
-    if (size_to_downgrade && size_to_downgrade > it->size)
-        size_to_free = size_to_downgrade - it->size;
+    if (size_to_downgrade && size_to_downgrade > entry.size)
+        size_to_free = size_to_downgrade - entry.size;
 
     EvictionCandidates eviction_candidates;
     FileCacheReserveStat stat;
@@ -205,8 +185,8 @@ SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protecte
         /// "downgrade" canidates cannot be moved to probationary queue,
         /// so entry cannot be moved to protected queue as well.
         /// Then just increase its priority within probationary queue.
-        probationary_queue.increasePriority(it, lock);
-        return it;
+        lru_it->increasePriority(lock);
+        return;
     }
 
     /// Make space for "downgrade" candidates.
@@ -214,19 +194,21 @@ SLRUFileCachePriority::increasePriority(SLRUQueueIterator & it, bool is_protecte
 
     /// All checks passed, now we can move downgrade candidates to
     /// probationary queue and our entry to protected queue.
-    Entry entry = *it;
-    probationary_queue.remove(it, lock);
+    Entry entry_copy = lru_it->getEntry();
+    lru_it->remove(lock);
 
     for (const auto & [key, key_candidates] : downgrade_candidates)
     {
         for (const auto & candidate : key_candidates.candidates)
         {
-            auto * candidate_it = assert_cast<SLRUFileCacheIterator *>(candidate->getQueueIterator().get());
-            probationary_queue.move(candidate_it->queue_iter, protected_queue, lock);
+            auto * candidate_it = assert_cast<SLRUIterator *>(candidate->getQueueIterator().get());
+            candidate_it->lru_iterator = probationary_queue.move(*candidate_it->lru_iterator, protected_queue, lock);
+            candidate_it->is_protected = false;
         }
     }
 
-    return protected_queue.add(std::move(entry), lock);
+    iterator.lru_iterator = protected_queue.add(std::move(entry_copy), lock);
+    iterator.is_protected = true;
 }
 
 FileSegments SLRUFileCachePriority::dump(const CacheGuard::Lock & lock)
@@ -237,67 +219,58 @@ FileSegments SLRUFileCachePriority::dump(const CacheGuard::Lock & lock)
     return res;
 }
 
-SLRUFileCachePriority::SLRUFileCacheIterator::SLRUFileCacheIterator(
-    SLRUFileCachePriority * cache_priority_,
-    SLRUFileCachePriority::SLRUQueueIterator queue_iter_,
-    bool is_protected_)
-    : cache_priority(cache_priority_)
-    , queue_iter(queue_iter_)
-    , is_protected(is_protected_)
-{
-}
-
-void SLRUFileCachePriority::SLRUFileCacheIterator::remove(const CacheGuard::Lock & lock)
-{
-    checkUsable();
-    cache_priority->remove(queue_iter, is_protected, lock);
-    queue_iter = SLRUQueueIterator{};
-}
-
-void SLRUFileCachePriority::SLRUFileCacheIterator::invalidate()
-{
-    checkUsable();
-
-    LOG_TEST(
-        cache_priority->log,
-        "Invalidating entry in SLRU queue. Key: {}, offset: {}, previous size: {}",
-        queue_iter->key, queue_iter->offset, queue_iter->size);
-
-    cache_priority->updateSize(-queue_iter->size, is_protected);
-    cache_priority->updateElementsCount(-1, is_protected);
-    queue_iter->size = 0;
-}
-
-void SLRUFileCachePriority::SLRUFileCacheIterator::updateSize(int64_t size)
-{
-    checkUsable();
-
-    LOG_TEST(
-        cache_priority->log,
-        "Update size with {} in SLRU queue for key: {}, offset: {}, previous size: {}",
-        size, queue_iter->key, queue_iter->offset, queue_iter->size);
-
-    cache_priority->updateSize(size, is_protected);
-    queue_iter->size += size;
-}
-
-size_t SLRUFileCachePriority::SLRUFileCacheIterator::increasePriority(const CacheGuard::Lock & lock)
-{
-    checkUsable();
-    queue_iter = cache_priority->increasePriority(queue_iter, is_protected, lock);
-    return ++queue_iter->hits;
-}
-
-void SLRUFileCachePriority::SLRUFileCacheIterator::checkUsable() const
-{
-    if (queue_iter == SLRUQueueIterator{})
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
-}
-
 void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock)
 {
     protected_queue.shuffle(lock);
     probationary_queue.shuffle(lock);
 }
 
+SLRUFileCachePriority::SLRUIterator::SLRUIterator(
+    SLRUFileCachePriority * cache_priority_,
+    std::unique_ptr<LRUFileCachePriority::LRUIterator> lru_iterator_,
+    bool is_protected_)
+    : cache_priority(cache_priority_)
+    , lru_iterator(std::move(lru_iterator_))
+    , is_protected(is_protected_)
+{
+}
+
+const SLRUFileCachePriority::Entry & SLRUFileCachePriority::SLRUIterator::getEntry() const
+{
+    checkUsable();
+    return lru_iterator->getEntry();
+}
+
+size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CacheGuard::Lock & lock)
+{
+    checkUsable();
+    cache_priority->increasePriority(*this, lock);
+    return getEntry().hits;
+}
+
+void SLRUFileCachePriority::SLRUIterator::updateSize(int64_t size)
+{
+    checkUsable();
+    lru_iterator->updateSize(size);
+}
+
+void SLRUFileCachePriority::SLRUIterator::invalidate()
+{
+    checkUsable();
+    lru_iterator->invalidate();
+}
+
+void SLRUFileCachePriority::SLRUIterator::remove(const CacheGuard::Lock & lock)
+{
+    checkUsable();
+    lru_iterator->remove(lock);
+    lru_iterator = nullptr;
+}
+
+void SLRUFileCachePriority::SLRUIterator::checkUsable() const
+{
+    if (!lru_iterator)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
+}
+
 }
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index a179a5285b2..079bc342d1b 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -20,9 +20,9 @@ namespace DB
 class SLRUFileCachePriority : public IFileCachePriority
 {
 private:
-    class SLRUFileCacheIterator;
+    class SLRUIterator;
+    using LRUIterator = LRUFileCachePriority::LRUIterator;
     using LRUQueue = std::list<Entry>;
-    using SLRUQueueIterator = typename LRUQueue::iterator;
 
 public:
     SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio);
@@ -46,28 +46,23 @@ public:
     FileSegments dump(const CacheGuard::Lock &) override;
 
 private:
-    void updateElementsCount(int64_t num, bool is_protected);
-    void updateSize(int64_t size, bool is_protected);
-
     LRUFileCachePriority protected_queue;
     LRUFileCachePriority probationary_queue;
-
     Poco::Logger * log = &Poco::Logger::get("SLRUFileCachePriority");
 
-    SLRUQueueIterator remove(SLRUQueueIterator it, bool is_protected, const CacheGuard::Lock & lock);
-    SLRUQueueIterator increasePriority(SLRUQueueIterator & it, bool is_protected, const CacheGuard::Lock & lock);
+    void increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock);
 };
 
-class SLRUFileCachePriority::SLRUFileCacheIterator : public IFileCachePriority::IIterator
+class SLRUFileCachePriority::SLRUIterator : public IFileCachePriority::IIterator
 {
     friend class SLRUFileCachePriority;
 public:
-    SLRUFileCacheIterator(
+    SLRUIterator(
         SLRUFileCachePriority * cache_priority_,
-        SLRUFileCachePriority::SLRUQueueIterator queue_iter_,
+        std::unique_ptr<LRUIterator> lru_iterator_,
         bool is_protected_);
 
-    const Entry & getEntry() const override { return *queue_iter; }
+    const Entry & getEntry() const override;
 
     size_t increasePriority(const CacheGuard::Lock &) override;
 
@@ -81,8 +76,8 @@ private:
     void checkUsable() const;
 
     SLRUFileCachePriority * cache_priority;
-    mutable SLRUFileCachePriority::SLRUQueueIterator queue_iter;
-    const bool is_protected;
+    mutable std::unique_ptr<LRUIterator> lru_iterator;
+    bool is_protected;
 };
 
 }

From b5eff71959fd1e6873f7686a69bab48933d51e9d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 11:18:56 +0100
Subject: [PATCH 023/213] Add a test

---
 src/Interpreters/Cache/FileSegment.cpp        |   3 +-
 .../Cache/SLRUFileCachePriority.h             |   5 +-
 ...lru_file_cache.cpp => gtest_filecache.cpp} | 155 +++++++++++++++++-
 3 files changed, 158 insertions(+), 5 deletions(-)
 rename src/Interpreters/tests/{gtest_lru_file_cache.cpp => gtest_filecache.cpp} (86%)

diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 8216d7a9a81..10c277772be 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -844,7 +844,8 @@ FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment)
         file_segment->offset(),
         file_segment->range().size(),
         State::DETACHED,
-        CreateFileSegmentSettings(file_segment->getKind(), file_segment->is_unbound));
+        CreateFileSegmentSettings(file_segment->getKind(), file_segment->is_unbound),
+        false, file_segment->cache, file_segment->key_metadata, file_segment->queue_iterator);
 
     snapshot->hits_count = file_segment->getHitsCount();
     snapshot->downloaded_size = file_segment->getDownloadedSize();
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index 079bc342d1b..f03a145c533 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -20,11 +20,12 @@ namespace DB
 class SLRUFileCachePriority : public IFileCachePriority
 {
 private:
-    class SLRUIterator;
     using LRUIterator = LRUFileCachePriority::LRUIterator;
     using LRUQueue = std::list<Entry>;
 
 public:
+    class SLRUIterator;
+
     SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio);
 
     size_t getSize(const CacheGuard::Lock & lock) const override;
@@ -72,6 +73,8 @@ public:
 
     void updateSize(int64_t size) override;
 
+    bool isProtected() const { return is_protected; }
+
 private:
     void checkUsable() const;
 
diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
similarity index 86%
rename from src/Interpreters/tests/gtest_lru_file_cache.cpp
rename to src/Interpreters/tests/gtest_filecache.cpp
index ab2a128de34..f9a18afa618 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -19,6 +19,7 @@
 #include <Interpreters/Cache/FileCache.h>
 #include <Interpreters/Cache/FileCacheSettings.h>
 #include <Interpreters/Cache/FileSegment.h>
+#include <Interpreters/Cache/SLRUFileCachePriority.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <base/hex.h>
@@ -124,6 +125,42 @@ void assertEqual(const FileSegmentsHolderPtr & file_segments, const Ranges & exp
     assertEqual(file_segments->begin(), file_segments->end(), file_segments->size(), expected_ranges, expected_states);
 }
 
+void assertProtectedOrProbationary(const FileSegments & file_segments, const Ranges & expected, bool assert_protected)
+{
+    std::cerr << "File segments: ";
+    std::vector<Range> res;
+    for (const auto & f : file_segments)
+    {
+        std::cerr << f->range().toString() << ", ";
+        if (auto it = f->getQueueIterator())
+        {
+            if (auto * slru_it = dynamic_cast<SLRUFileCachePriority::SLRUIterator *>(it.get()))
+            {
+                if ((slru_it->isProtected() && assert_protected) || (!slru_it->isProtected() && !assert_protected))
+                {
+                    res.push_back(f->range());
+                }
+            }
+        }
+    }
+
+    ASSERT_EQ(res.size(), expected.size());
+    for (size_t i = 0; i < res.size(); ++i)
+    {
+        ASSERT_EQ(res[i], expected[i]);
+    }
+}
+
+void assertProtected(const FileSegments & file_segments, const Ranges & expected)
+{
+    assertProtectedOrProbationary(file_segments, expected, true);
+}
+
+void assertProbationary(const FileSegments & file_segments, const Ranges & expected)
+{
+    assertProtectedOrProbationary(file_segments, expected, false);
+}
+
 FileSegment & get(const HolderPtr & holder, int i)
 {
     auto it = std::next(holder->begin(), i);
@@ -134,7 +171,7 @@ FileSegment & get(const HolderPtr & holder, int i)
 
 void download(FileSegment & file_segment)
 {
-    std::cerr << "Downloading range " << file_segment.range().toString() << "\n";
+    std::cerr << "\nDownloading range " << file_segment.range().toString() << "\n";
 
     ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
     ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
@@ -167,7 +204,7 @@ void download(const HolderPtr & holder)
 void increasePriority(const HolderPtr & holder)
 {
     for (auto & it : *holder)
-        it->use();
+        it->increasePriority();
 }
 
 class FileCacheTest : public ::testing::Test
@@ -216,7 +253,7 @@ public:
     pcg64 rng;
 };
 
-TEST_F(FileCacheTest, get)
+TEST_F(FileCacheTest, LRUPolicy)
 {
     DB::ThreadStatus thread_status;
 
@@ -1040,3 +1077,115 @@ TEST_F(FileCacheTest, TemporaryDataReadBufferSize)
         ASSERT_EQ(stream.getSize(), 62);
     }
 }
+
+TEST_F(FileCacheTest, SLRUPolicy)
+{
+    DB::ThreadStatus thread_status;
+    std::string query_id = "query_id"; /// To work with cache need query_id and query context.
+
+    Poco::XML::DOMParser dom_parser;
+    std::string xml(R"CONFIG(<clickhouse>
+</clickhouse>)CONFIG");
+    Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
+    Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
+    getMutableContext().context->setConfig(config);
+
+    auto query_context = DB::Context::createCopy(getContext().context);
+    query_context->makeQueryContext();
+    query_context->setCurrentQueryId(query_id);
+    chassert(&DB::CurrentThread::get() == &thread_status);
+    DB::CurrentThread::QueryScope query_scope_holder(query_context);
+
+    DB::FileCacheSettings settings;
+    settings.base_path = cache_base_path;
+    settings.max_size = 40;
+    settings.max_elements = 6;
+    settings.boundary_alignment = 1;
+
+    settings.cache_policy = "SLRU";
+    settings.slru_size_ratio = 0.5;
+
+    const size_t file_size = -1; // the value doesn't really matter because boundary_alignment == 1.
+    size_t file_cache_name = 0;
+
+    {
+        auto cache = DB::FileCache(std::to_string(++file_cache_name), settings);
+        cache.initialize();
+        auto key = cache.createKeyForPath("key1");
+
+        auto add_range = [&](size_t offset, size_t size)
+        {
+            std::cerr << "Add [" << offset << ", " << offset + size - 1 << "]" << std::endl;
+
+            auto holder = cache.getOrSet(key, offset, size, file_size, {});
+            assertEqual(holder, { Range(offset, offset + size - 1) }, { State::EMPTY });
+            download(holder->front());
+            assertEqual(holder, { Range(offset, offset + size - 1) }, { State::DOWNLOADED });
+        };
+
+        auto check_covering_range = [&](size_t offset, size_t size, Ranges covering_ranges)
+        {
+            auto holder = cache.getOrSet(key, offset, size, file_size, {});
+            std::vector<State> states(covering_ranges.size(), State::DOWNLOADED);
+            assertEqual(holder, covering_ranges, states);
+            increasePriority(holder);
+        };
+
+        add_range(0, 10);
+        add_range(10, 5);
+
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+
+        ASSERT_EQ(cache.getFileSegmentsNum(), 2);
+        ASSERT_EQ(cache.getUsedCacheSize(), 15);
+
+        assertProbationary(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+        assertProtected(cache.dumpQueue(), Ranges{});
+
+        check_covering_range(9, 1, { Range(0, 9) });
+        assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
+
+        check_covering_range(10, 1, { Range(10, 14) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+
+        assertProbationary(cache.dumpQueue(), Ranges{});
+        assertProtected(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+
+        add_range(17, 4);
+        assertEqual(cache.dumpQueue(), { Range(17, 20), Range(0, 9), Range(10, 14) });
+
+        add_range(24, 3);
+        assertEqual(cache.dumpQueue(), { Range(17, 20), Range(24, 26), Range(0, 9), Range(10, 14) });
+
+        add_range(27, 1);
+        assertEqual(cache.dumpQueue(), { Range(17, 20), Range(24, 26), Range(27, 27), Range(0, 9), Range(10, 14) });
+
+        assertProbationary(cache.dumpQueue(), { Range(17, 20), Range(24, 26), Range(27, 27) });
+        assertProtected(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
+        ASSERT_EQ(cache.getFileSegmentsNum(), 5);
+        ASSERT_EQ(cache.getUsedCacheSize(), 23);
+
+        add_range(28, 3);
+        assertEqual(cache.dumpQueue(), { Range(24, 26), Range(27, 27), Range(28, 30), Range(0, 9), Range(10, 14) });
+
+        assertProbationary(cache.dumpQueue(), { Range(24, 26), Range(27, 27), Range(28, 30) });
+        assertProtected(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+
+        check_covering_range(4, 1, { Range(0, 9) });
+
+        assertProbationary(cache.dumpQueue(), { Range(24, 26), Range(27, 27), Range(28, 30) });
+        assertProtected(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
+
+        check_covering_range(27, 3, { Range(27, 27), Range(28, 30) });
+
+        assertProbationary(cache.dumpQueue(), { Range(24, 26), Range(10, 14) });
+        assertProtected(cache.dumpQueue(), { Range(0, 9), Range(27, 27), Range(28, 30) });
+
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(24, 26), Range(27, 27), Range(28, 30) });
+        ASSERT_EQ(cache.getFileSegmentsNum(), 5);
+        ASSERT_EQ(cache.getUsedCacheSize(), 22);
+    }
+}

From dc5e5f3b2046e14a2cce429c389cc5b49a98a99c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 13:16:04 +0100
Subject: [PATCH 024/213] Refactoring

---
 src/Interpreters/Cache/FileCache.cpp          |  2 +-
 src/Interpreters/Cache/FileCache.h            |  3 +-
 src/Interpreters/Cache/FileSegment.cpp        | 12 ++-
 src/Interpreters/Cache/FileSegment.h          |  8 +-
 src/Interpreters/Cache/IFileCachePriority.h   | 13 +--
 .../Cache/LRUFileCachePriority.cpp            | 59 ++++++------
 src/Interpreters/Cache/LRUFileCachePriority.h | 16 ++--
 src/Interpreters/Cache/Metadata.h             |  2 +-
 src/Interpreters/Cache/QueryLimit.cpp         |  2 +-
 src/Interpreters/Cache/QueryLimit.h           |  5 +-
 .../Cache/SLRUFileCachePriority.cpp           | 91 +++++++++----------
 .../Cache/SLRUFileCachePriority.h             |  8 +-
 12 files changed, 112 insertions(+), 109 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index b47e84895c5..c29f01745e4 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -1046,7 +1046,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
             }
 
             bool limits_satisfied;
-            IFileCachePriority::Iterator cache_it;
+            IFileCachePriority::IteratorPtr cache_it;
             {
                 auto lock = lockCache();
                 limits_satisfied = (size_limit == 0 || main_priority->getSize(lock) + size <= size_limit)
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 879ee2c133b..493cde9652a 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -74,7 +74,6 @@ public:
     using QueryLimit = DB::FileCacheQueryLimit;
     using Priority = IFileCachePriority;
     using PriorityEntry = IFileCachePriority::Entry;
-    using PriorityIterator = IFileCachePriority::Iterator;
 
     FileCache(const std::string & cache_name, const FileCacheSettings & settings);
 
@@ -205,7 +204,7 @@ private:
         const size_t queue_size;
 
         std::unique_ptr<LRUFileCachePriority> queue;
-        using Records = std::unordered_map<KeyAndOffset, PriorityIterator, FileCacheKeyAndOffsetHash>;
+        using Records = std::unordered_map<KeyAndOffset, Priority::IteratorPtr, FileCacheKeyAndOffsetHash>;
         Records records;
     };
 
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index de23406cf4a..3ad76131fcc 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -54,7 +54,7 @@ FileSegment::FileSegment(
         bool background_download_enabled_,
         FileCache * cache_,
         std::weak_ptr<KeyMetadata> key_metadata_,
-        Priority::Iterator queue_iterator_)
+        Priority::IteratorPtr queue_iterator_)
     : file_key(key_)
     , segment_range(offset_, offset_ + size_ - 1)
     , segment_kind(settings.kind)
@@ -146,13 +146,13 @@ size_t FileSegment::getReservedSize() const
     return reserved_size;
 }
 
-FileSegment::Priority::Iterator FileSegment::getQueueIterator() const
+FileSegment::Priority::IteratorPtr FileSegment::getQueueIterator() const
 {
     auto lock = lockFileSegment();
     return queue_iterator;
 }
 
-void FileSegment::setQueueIterator(Priority::Iterator iterator)
+void FileSegment::setQueueIterator(Priority::IteratorPtr iterator)
 {
     auto lock = lockFileSegment();
     if (queue_iterator)
@@ -775,7 +775,7 @@ bool FileSegment::assertCorrectness() const
 
 bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const
 {
-    auto check_iterator = [this](const Priority::Iterator & it)
+    auto check_iterator = [this](const Priority::IteratorPtr & it)
     {
         UNUSED(this);
         if (!it)
@@ -917,6 +917,10 @@ void FileSegment::increasePriority()
         return;
     }
 
+    /// Priority can be increased only for downloaded file segments.
+    if (download_state != State::DOWNLOADED)
+        return;
+
     auto it = getQueueIterator();
     if (it)
     {
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 59008f2e9c9..42b8f10c943 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -113,7 +113,7 @@ public:
         bool background_download_enabled_ = false,
         FileCache * cache_ = nullptr,
         std::weak_ptr<KeyMetadata> key_metadata_ = std::weak_ptr<KeyMetadata>(),
-        Priority::Iterator queue_iterator_ = Priority::Iterator{});
+        Priority::IteratorPtr queue_iterator_ = nullptr);
 
     ~FileSegment() = default;
 
@@ -219,9 +219,9 @@ public:
 
     FileSegmentGuard::Lock lock() const { return segment_guard.lock(); }
 
-    Priority::Iterator getQueueIterator() const;
+    Priority::IteratorPtr getQueueIterator() const;
 
-    void setQueueIterator(Priority::Iterator iterator);
+    void setQueueIterator(Priority::IteratorPtr iterator);
 
     KeyMetadataPtr tryGetKeyMetadata() const;
 
@@ -309,7 +309,7 @@ private:
 
     mutable FileSegmentGuard segment_guard;
     std::weak_ptr<KeyMetadata> key_metadata;
-    mutable Priority::Iterator queue_iterator; /// Iterator is put here on first reservation attempt, if successful.
+    mutable Priority::IteratorPtr queue_iterator; /// Iterator is put here on first reservation attempt, if successful.
     FileCache * cache;
     std::condition_variable cv;
 
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index 1703ed09139..fe5de21ec48 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -12,12 +12,10 @@ namespace DB
 struct FileCacheReserveStat;
 class EvictionCandidates;
 
-/// IFileCachePriority is used to maintain the priority of cached data.
 class IFileCachePriority : private boost::noncopyable
 {
 public:
     using Key = FileCacheKey;
-    using KeyAndOffset = FileCacheKeyAndOffset;
 
     struct Entry
     {
@@ -32,10 +30,10 @@ public:
         size_t hits = 0;
     };
 
-    class IIterator
+    class Iterator
     {
     public:
-        virtual ~IIterator() = default;
+        virtual ~Iterator() = default;
 
         virtual const Entry & getEntry() const = 0;
 
@@ -47,7 +45,7 @@ public:
 
         virtual void invalidate() = 0;
     };
-    using Iterator = std::shared_ptr<IIterator>;
+    using IteratorPtr = std::shared_ptr<Iterator>;
 
     IFileCachePriority(size_t max_size_, size_t max_elements_);
 
@@ -61,19 +59,18 @@ public:
 
     virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
 
-    virtual Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
+    virtual IteratorPtr add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
 
     virtual void shuffle(const CacheGuard::Lock &) = 0;
 
     virtual FileSegments dump(const CacheGuard::Lock &) = 0;
 
     using FinalizeEvictionFunc = std::function<void(const CacheGuard::Lock & lk)>;
-
     virtual bool collectCandidatesForEviction(
         size_t size,
         FileCacheReserveStat & stat,
         EvictionCandidates & res,
-        IFileCachePriority::Iterator it,
+        IFileCachePriority::IteratorPtr reservee,
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) = 0;
 
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 5cd44a67d89..625be890cd3 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -26,7 +26,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-IFileCachePriority::Iterator LRUFileCachePriority::add(
+IFileCachePriority::IteratorPtr LRUFileCachePriority::add(
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,
@@ -67,7 +67,7 @@ std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::add(Ent
             entry.key, entry.offset, entry.size, current_size, size_limit);
     }
 
-    auto it = queue.insert(queue.end(), entry);
+    auto iterator = queue.insert(queue.end(), entry);
 
     updateSize(entry.size);
     updateElementsCount(1);
@@ -76,7 +76,7 @@ std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::add(Ent
         log, "Added entry into LRU queue, key: {}, offset: {}, size: {}",
         entry.key, entry.offset, entry.size);
 
-    return std::make_unique<LRUIterator>(this, it);
+    return std::make_unique<LRUIterator>(this, iterator);
 }
 
 LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CacheGuard::Lock &)
@@ -107,9 +107,11 @@ void LRUFileCachePriority::updateElementsCount(int64_t num)
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements, num);
 }
 
-
-LRUFileCachePriority::LRUIterator::LRUIterator(LRUFileCachePriority * cache_priority_, LRUQueue::iterator queue_iter_)
-    : cache_priority(cache_priority_), queue_iter(queue_iter_)
+LRUFileCachePriority::LRUIterator::LRUIterator(
+    LRUFileCachePriority * cache_priority_,
+    LRUQueue::iterator iterator_)
+    : cache_priority(cache_priority_)
+    , iterator(iterator_)
 {
 }
 
@@ -166,7 +168,11 @@ bool LRUFileCachePriority::canFit(size_t size, const CacheGuard::Lock & lock) co
     return canFit(size, 0, 0, lock);
 }
 
-bool LRUFileCachePriority::canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const
+bool LRUFileCachePriority::canFit(
+    size_t size,
+    size_t released_size_assumption,
+    size_t released_elements_assumption,
+    const CacheGuard::Lock &) const
 {
     return (max_size == 0 || (current_size + size - released_size_assumption <= max_size))
         && (max_elements == 0 || current_elements_num + 1 - released_elements_assumption <= max_elements);
@@ -176,7 +182,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
     size_t size,
     FileCacheReserveStat & stat,
     EvictionCandidates & res,
-    IFileCachePriority::Iterator,
+    IFileCachePriority::IteratorPtr,
     FinalizeEvictionFunc &,
     const CacheGuard::Lock & lock)
 {
@@ -217,7 +223,8 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
     return can_fit();
 }
 
-std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &)
+std::unique_ptr<LRUFileCachePriority::LRUIterator>
+LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &)
 {
     const auto & entry = it.getEntry();
     if (entry.size == 0)
@@ -240,14 +247,14 @@ std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::move(LR
     }
 #endif
 
-    queue.splice(queue.end(), other.queue, it.queue_iter);
+    queue.splice(queue.end(), other.queue, it.iterator);
 
     updateSize(entry.size);
     updateElementsCount(1);
 
     other.updateSize(-entry.size);
     other.updateElementsCount(-1);
-    return std::make_unique<LRUIterator>(this, it.queue_iter);
+    return std::make_unique<LRUIterator>(this, it.iterator);
 }
 
 FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
@@ -263,48 +270,48 @@ FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
 
 void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock)
 {
-    checkUsable();
-    cache_priority->remove(queue_iter, lock);
-    queue_iter = LRUQueue::iterator{};
+    assertValid();
+    cache_priority->remove(iterator, lock);
+    iterator = LRUQueue::iterator{};
 }
 
 void LRUFileCachePriority::LRUIterator::invalidate()
 {
-    checkUsable();
+    assertValid();
 
     LOG_TEST(
         cache_priority->log,
         "Invalidating entry in LRU queue. Key: {}, offset: {}, previous size: {}",
-        queue_iter->key, queue_iter->offset, queue_iter->size);
+        iterator->key, iterator->offset, iterator->size);
 
-    cache_priority->updateSize(-queue_iter->size);
+    cache_priority->updateSize(-iterator->size);
     cache_priority->updateElementsCount(-1);
-    queue_iter->size = 0;
+    iterator->size = 0;
 }
 
 void LRUFileCachePriority::LRUIterator::updateSize(int64_t size)
 {
-    checkUsable();
+    assertValid();
 
     LOG_TEST(
         cache_priority->log,
         "Update size with {} in LRU queue for key: {}, offset: {}, previous size: {}",
-        size, queue_iter->key, queue_iter->offset, queue_iter->size);
+        size, iterator->key, iterator->offset, iterator->size);
 
     cache_priority->updateSize(size);
-    queue_iter->size += size;
+    iterator->size += size;
 }
 
 size_t LRUFileCachePriority::LRUIterator::increasePriority(const CacheGuard::Lock &)
 {
-    checkUsable();
-    cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter);
-    return ++queue_iter->hits;
+    assertValid();
+    cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, iterator);
+    return ++iterator->hits;
 }
 
-void LRUFileCachePriority::LRUIterator::checkUsable() const
+void LRUFileCachePriority::LRUIterator::assertValid() const
 {
-    if (queue_iter == LRUQueue::iterator{})
+    if (iterator == LRUQueue::iterator{})
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
 }
 
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index acc8a5a9a76..63b93de76e4 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -11,7 +11,7 @@ namespace DB
 
 /// Based on the LRU algorithm implementation, the record with the lowest priority is stored at
 /// the head of the queue, and the record with the highest priority is stored at the tail.
-class LRUFileCachePriority : public IFileCachePriority
+class LRUFileCachePriority final : public IFileCachePriority
 {
 private:
     class LRUIterator;
@@ -25,13 +25,13 @@ public:
 
     size_t getElementsCount(const CacheGuard::Lock &) const override { return current_elements_num; }
 
-    Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
+    IteratorPtr add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
 
     bool collectCandidatesForEviction(
         size_t size,
         FileCacheReserveStat & stat,
         EvictionCandidates & res,
-        IFileCachePriority::Iterator it,
+        IFileCachePriority::IteratorPtr reservee,
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) override;
 
@@ -71,14 +71,14 @@ private:
     std::unique_ptr<LRUIterator> add(Entry && entry, const CacheGuard::Lock &);
 };
 
-class LRUFileCachePriority::LRUIterator : public IFileCachePriority::IIterator
+class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator
 {
     friend class LRUFileCachePriority;
     friend class SLRUFileCachePriority;
 public:
-    LRUIterator(LRUFileCachePriority * cache_priority_, LRUQueue::iterator queue_iter_);
+    LRUIterator(LRUFileCachePriority * cache_priority_, LRUQueue::iterator iterator_);
 
-    const Entry & getEntry() const override { return *queue_iter; }
+    const Entry & getEntry() const override { return *iterator; }
 
     size_t increasePriority(const CacheGuard::Lock &) override;
 
@@ -89,10 +89,10 @@ public:
     void updateSize(int64_t size) override;
 
 private:
-    void checkUsable() const;
+    void assertValid() const;
 
     LRUFileCachePriority * cache_priority;
-    mutable LRUQueue::iterator queue_iter;
+    mutable LRUQueue::iterator iterator;
 };
 
 }
diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h
index d68af87e8b4..c487e7a7b16 100644
--- a/src/Interpreters/Cache/Metadata.h
+++ b/src/Interpreters/Cache/Metadata.h
@@ -29,7 +29,7 @@ struct FileSegmentMetadata : private boost::noncopyable
 
     bool evicting() const { return removal_candidate.load(); }
 
-    Priority::Iterator getQueueIterator() const { return file_segment->getQueueIterator(); }
+    Priority::IteratorPtr getQueueIterator() const { return file_segment->getQueueIterator(); }
 
     FileSegmentPtr file_segment;
     std::atomic<bool> removal_candidate{false};
diff --git a/src/Interpreters/Cache/QueryLimit.cpp b/src/Interpreters/Cache/QueryLimit.cpp
index 8b5b42cca1a..109e94b2971 100644
--- a/src/Interpreters/Cache/QueryLimit.cpp
+++ b/src/Interpreters/Cache/QueryLimit.cpp
@@ -95,7 +95,7 @@ void FileCacheQueryLimit::QueryContext::remove(
     records.erase({key, offset});
 }
 
-IFileCachePriority::Iterator FileCacheQueryLimit::QueryContext::tryGet(
+IFileCachePriority::IteratorPtr FileCacheQueryLimit::QueryContext::tryGet(
     const Key & key,
     size_t offset,
     const CacheGuard::Lock &)
diff --git a/src/Interpreters/Cache/QueryLimit.h b/src/Interpreters/Cache/QueryLimit.h
index c252cd2dccc..cab1ccc63ba 100644
--- a/src/Interpreters/Cache/QueryLimit.h
+++ b/src/Interpreters/Cache/QueryLimit.h
@@ -27,7 +27,6 @@ public:
     public:
         using Key = FileCacheKey;
         using Priority = IFileCachePriority;
-        using PriorityIterator = IFileCachePriority::Iterator;
 
         QueryContext(size_t query_cache_size, bool recache_on_query_limit_exceeded_);
 
@@ -36,7 +35,7 @@ public:
 
         bool recacheOnFileCacheQueryLimitExceeded() const { return recache_on_query_limit_exceeded; }
 
-        Priority::Iterator tryGet(
+        Priority::IteratorPtr tryGet(
             const Key & key,
             size_t offset,
             const CacheGuard::Lock &);
@@ -53,7 +52,7 @@ public:
             const CacheGuard::Lock &);
 
     private:
-        using Records = std::unordered_map<FileCacheKeyAndOffset, Priority::Iterator, FileCacheKeyAndOffsetHash>;
+        using Records = std::unordered_map<FileCacheKeyAndOffset, Priority::IteratorPtr, FileCacheKeyAndOffsetHash>;
         Records records;
         LRUFileCachePriority priority;
         const bool recache_on_query_limit_exceeded;
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index ee8da79ee6f..ee311232515 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -52,38 +52,37 @@ size_t SLRUFileCachePriority::getElementsCount(const CacheGuard::Lock & lock) co
     return protected_queue.getElementsCount(lock) + probationary_queue.getElementsCount(lock);
 }
 
-IFileCachePriority::Iterator SLRUFileCachePriority::add(
+IFileCachePriority::IteratorPtr SLRUFileCachePriority::add(
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,
     const CacheGuard::Lock & lock)
 {
-    auto it = probationary_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
-    return std::make_shared<SLRUIterator>(this, std::move(it), false);
+    auto lru_iterator = probationary_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+    return std::make_shared<SLRUIterator>(this, std::move(lru_iterator), false);
 }
 
 bool SLRUFileCachePriority::collectCandidatesForEviction(
     size_t size,
     FileCacheReserveStat & stat,
     EvictionCandidates & res,
-    IFileCachePriority::Iterator it,
+    IFileCachePriority::IteratorPtr reservee,
     FinalizeEvictionFunc & finalize_eviction_func,
     const CacheGuard::Lock & lock)
 {
-    /// `it` is a pointer to entry we want to evict in favour of.
     /// If `it` is nullptr, then it is the first space reservation attempt
     /// for a corresponding file segment, so it will be directly put into probationary queue.
-    if (!it)
+    if (!reservee)
     {
-        return probationary_queue.collectCandidatesForEviction(size, stat, res, it, finalize_eviction_func, lock);
+        return probationary_queue.collectCandidatesForEviction(size, stat, res, reservee, finalize_eviction_func, lock);
     }
 
     /// If `it` not nullptr (e.g. is already in some queue),
     /// we need to check in which queue (protected/probationary) it currently is
     /// (in order to know where we need to free space).
-    if (!assert_cast<SLRUIterator *>(it.get())->is_protected)
+    if (!assert_cast<SLRUIterator *>(reservee.get())->is_protected)
     {
-        return probationary_queue.collectCandidatesForEviction(size, stat, res, it, finalize_eviction_func, lock);
+        return probationary_queue.collectCandidatesForEviction(size, stat, res, reservee, finalize_eviction_func, lock);
     }
 
     /// Entry is in protected queue.
@@ -100,13 +99,13 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
     FileCacheReserveStat downgrade_stat;
     FinalizeEvictionFunc noop;
 
-    if (!protected_queue.collectCandidatesForEviction(size, downgrade_stat, *downgrade_candidates, it, noop, lock))
+    if (!protected_queue.collectCandidatesForEviction(size, downgrade_stat, *downgrade_candidates, reservee, noop, lock))
         return false;
 
     const size_t size_to_downgrade = downgrade_stat.stat.releasable_size;
 
     if (!probationary_queue.canFit(size_to_downgrade, lock)
-        && !probationary_queue.collectCandidatesForEviction(size_to_downgrade, stat, res, it, noop, lock))
+        && !probationary_queue.collectCandidatesForEviction(size_to_downgrade, stat, res, reservee, noop, lock))
         return false;
 
     finalize_eviction_func = [=, this](const CacheGuard::Lock & lk) mutable
@@ -127,75 +126,73 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
 
 void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock)
 {
-    auto & lru_it = iterator.lru_iterator;
-    const bool is_protected = iterator.is_protected;
-    const auto & entry = lru_it->getEntry();
-
-    /// If entry (`it` is the pointer to the entry) is already in protected queue,
+    /// If entry is already in protected queue,
     /// we only need to increase its priority within the protected queue.
-    if (is_protected)
+    if (iterator.is_protected)
     {
-        lru_it->increasePriority(lock);
+        iterator.lru_iterator->increasePriority(lock);
         return;
     }
 
     /// Entry is in probationary queue.
     /// We need to move it to protected queue.
 
-    if (entry.size > protected_queue.getSizeLimit())
+    const size_t size = iterator.getEntry().size;
+    if (size > protected_queue.getSizeLimit())
     {
         /// Entry size is bigger than the whole protected queue limit.
         /// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
         /// which is not possible in any realistic cache configuration.
-        lru_it->increasePriority(lock);
+        iterator.lru_iterator->increasePriority(lock);
         return;
     }
 
     /// Check if there is enough space in protected queue to move entry there.
     /// If not - we need to "downgrade" lowest priority entries from protected
     /// queue to probationary queue.
-    /// The amount of such "downgraded" entries is equal to the amount
-    /// required to make space for entry we want to insert.
     EvictionCandidates downgrade_candidates;
     FileCacheReserveStat downgrade_stat;
     FinalizeEvictionFunc noop;
 
-    if (!protected_queue.collectCandidatesForEviction(entry.size, downgrade_stat, downgrade_candidates, {}, noop, lock))
+    if (!protected_queue.collectCandidatesForEviction(size, downgrade_stat, downgrade_candidates, {}, noop, lock))
     {
         /// We cannot make space for entry to be moved to protected queue
         /// (not enough releasable file segments).
         /// Then just increase its priority within probationary queue.
-        lru_it->increasePriority(lock);
+        iterator.lru_iterator->increasePriority(lock);
         return;
     }
 
+    /// The amount of such "downgraded" entries is equal to the amount
+    /// required to make space for entry we want to insert.
+    const size_t size_to_downgrade = downgrade_stat.stat.releasable_count;
+    size_t size_to_free = 0;
+    if (size_to_downgrade && size_to_downgrade > size)
+        size_to_free = size_to_downgrade - size;
+
     /// Now we need to check if those "downgrade" candidates can actually
     /// be moved to probationary queue.
-    const size_t size_to_downgrade = downgrade_stat.stat.releasable_count;
-    size_t size_to_free = 0;
-    if (size_to_downgrade && size_to_downgrade > entry.size)
-        size_to_free = size_to_downgrade - entry.size;
-
     EvictionCandidates eviction_candidates;
     FileCacheReserveStat stat;
 
-    if (size_to_free
-        && !probationary_queue.collectCandidatesForEviction(size_to_free, stat, eviction_candidates, {}, noop, lock))
+    if (size_to_free)
     {
-        /// "downgrade" canidates cannot be moved to probationary queue,
-        /// so entry cannot be moved to protected queue as well.
-        /// Then just increase its priority within probationary queue.
-        lru_it->increasePriority(lock);
-        return;
+        if (!probationary_queue.collectCandidatesForEviction(size_to_free, stat, eviction_candidates, {}, noop, lock))
+        {
+            /// "downgrade" canidates cannot be moved to probationary queue,
+            /// so entry cannot be moved to protected queue as well.
+            /// Then just increase its priority within probationary queue.
+            iterator.lru_iterator->increasePriority(lock);
+            return;
+        }
+        /// Make space for "downgrade" candidates.
+        eviction_candidates.evict(nullptr, lock);
     }
 
-    /// Make space for "downgrade" candidates.
-    eviction_candidates.evict(nullptr, lock);
-
     /// All checks passed, now we can move downgrade candidates to
     /// probationary queue and our entry to protected queue.
-    Entry entry_copy = lru_it->getEntry();
-    lru_it->remove(lock);
+    Entry entry_copy = iterator.getEntry();
+    iterator.lru_iterator->remove(lock);
 
     for (const auto & [key, key_candidates] : downgrade_candidates)
     {
@@ -237,37 +234,37 @@ SLRUFileCachePriority::SLRUIterator::SLRUIterator(
 
 const SLRUFileCachePriority::Entry & SLRUFileCachePriority::SLRUIterator::getEntry() const
 {
-    checkUsable();
+    assertValid();
     return lru_iterator->getEntry();
 }
 
 size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CacheGuard::Lock & lock)
 {
-    checkUsable();
+    assertValid();
     cache_priority->increasePriority(*this, lock);
     return getEntry().hits;
 }
 
 void SLRUFileCachePriority::SLRUIterator::updateSize(int64_t size)
 {
-    checkUsable();
+    assertValid();
     lru_iterator->updateSize(size);
 }
 
 void SLRUFileCachePriority::SLRUIterator::invalidate()
 {
-    checkUsable();
+    assertValid();
     lru_iterator->invalidate();
 }
 
 void SLRUFileCachePriority::SLRUIterator::remove(const CacheGuard::Lock & lock)
 {
-    checkUsable();
+    assertValid();
     lru_iterator->remove(lock);
     lru_iterator = nullptr;
 }
 
-void SLRUFileCachePriority::SLRUIterator::checkUsable() const
+void SLRUFileCachePriority::SLRUIterator::assertValid() const
 {
     if (!lru_iterator)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index f03a145c533..7b323353d70 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -32,13 +32,13 @@ public:
 
     size_t getElementsCount(const CacheGuard::Lock &) const override;
 
-    Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
+    IteratorPtr add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
 
     bool collectCandidatesForEviction(
         size_t size,
         FileCacheReserveStat & stat,
         EvictionCandidates & res,
-        IFileCachePriority::Iterator,
+        IFileCachePriority::IteratorPtr reservee,
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) override;
 
@@ -54,7 +54,7 @@ private:
     void increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock);
 };
 
-class SLRUFileCachePriority::SLRUIterator : public IFileCachePriority::IIterator
+class SLRUFileCachePriority::SLRUIterator : public IFileCachePriority::Iterator
 {
     friend class SLRUFileCachePriority;
 public:
@@ -76,7 +76,7 @@ public:
     bool isProtected() const { return is_protected; }
 
 private:
-    void checkUsable() const;
+    void assertValid() const;
 
     SLRUFileCachePriority * cache_priority;
     mutable std::unique_ptr<LRUIterator> lru_iterator;

From 0f09a69b0528f1cd36eb614dacbbb2223317ac9a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 15:57:32 +0100
Subject: [PATCH 025/213] Fix style check

---
 src/Interpreters/Cache/FileCache.cpp             | 6 +-----
 src/Interpreters/Cache/FileCache.h               | 5 -----
 src/Interpreters/Cache/SLRUFileCachePriority.cpp | 8 +-------
 src/Interpreters/Cache/SLRUFileCachePriority.h   | 8 --------
 4 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index c29f01745e4..2e12d6ef9bf 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -22,13 +22,8 @@ namespace fs = std::filesystem;
 namespace ProfileEvents
 {
     extern const Event FilesystemCacheLoadMetadataMicroseconds;
-    extern const Event FilesystemCacheEvictedBytes;
-    extern const Event FilesystemCacheEvictedFileSegments;
-    extern const Event FilesystemCacheEvictionSkippedFileSegments;
-    extern const Event FilesystemCacheEvictionTries;
     extern const Event FilesystemCacheLockCacheMicroseconds;
     extern const Event FilesystemCacheReserveMicroseconds;
-    extern const Event FilesystemCacheEvictMicroseconds;
     extern const Event FilesystemCacheGetOrSetMicroseconds;
     extern const Event FilesystemCacheGetMicroseconds;
 }
@@ -53,6 +48,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & settings)
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 493cde9652a..097a63b0abe 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -22,11 +22,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
 /// Track acquired space in cache during reservation
 /// to make error messages when no space left more informative.
 struct FileCacheReserveStat
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index ee311232515..a9e017c62e4 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -5,13 +5,7 @@
 #include <Common/randomSeed.h>
 #include <Common/logger_useful.h>
 #include <Common/assert_cast.h>
-#include <pcg-random/pcg_random.hpp>
 
-namespace CurrentMetrics
-{
-    extern const Metric FilesystemCacheSize;
-    extern const Metric FilesystemCacheElements;
-}
 
 namespace DB
 {
@@ -179,7 +173,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
     {
         if (!probationary_queue.collectCandidatesForEviction(size_to_free, stat, eviction_candidates, {}, noop, lock))
         {
-            /// "downgrade" canidates cannot be moved to probationary queue,
+            /// "downgrade" candidates cannot be moved to probationary queue,
             /// so entry cannot be moved to protected queue as well.
             /// Then just increase its priority within probationary queue.
             iterator.lru_iterator->increasePriority(lock);
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index 7b323353d70..46e8f37819f 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -1,16 +1,8 @@
 #pragma once
 
-#include <list>
-#include <Interpreters/Cache/IFileCachePriority.h>
-#include <Interpreters/Cache/FileCacheKey.h>
 #include <Interpreters/Cache/LRUFileCachePriority.h>
 #include <Common/logger_useful.h>
-#include <Interpreters/Cache/Guards.h>
 
-namespace CurrentMetrics
-{
-    extern const Metric FilesystemCacheSizeLimit;
-}
 
 namespace DB
 {

From 07b11534bc324f73488da6d8d037dc941bead7d6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 16:36:18 +0100
Subject: [PATCH 026/213] Tiny refactoring

---
 src/Interpreters/Cache/FileCache.cpp          |  3 +-
 src/Interpreters/Cache/FileCacheSettings.cpp  |  4 +++
 .../Cache/LRUFileCachePriority.cpp            | 31 +++++++++++++++----
 src/Interpreters/Cache/LRUFileCachePriority.h |  9 ++++--
 .../Cache/SLRUFileCachePriority.cpp           | 30 +++++++++---------
 .../Cache/SLRUFileCachePriority.h             |  4 +--
 6 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 2e12d6ef9bf..da996443e68 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -771,8 +771,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
         reserve_stat.stat_by_kind.clear();
     }
 
-    /// A file_segment_metadata acquires a LRUQueue iterator on first successful space reservation attempt,
-    /// e.g. queue_iteratir is std::nullopt here if no space has been reserved yet.
+    /// A file_segment_metadata acquires a priority iterator on first successful space reservation attempt,
     auto queue_iterator = file_segment.getQueueIterator();
     chassert(!queue_iterator || file_segment.getReservedSize() > 0);
 
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index 6055cec5ae5..564a0f2aacd 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -3,6 +3,7 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
 #include <Common/NamedCollections/NamedCollections.h>
+#include <boost/algorithm/string/case_conv.hpp>
 #include <IO/ReadHelpers.h>
 
 namespace DB
@@ -63,7 +64,10 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`");
 
     if (has("cache_policy"))
+    {
         cache_policy = get_string("cache_policy");
+        boost::to_upper(cache_policy);
+    }
 
     if (has("slru_size_ratio"))
         slru_size_ratio = get_double("slru_size_ratio");
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 625be890cd3..f9b0ddfce15 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -32,10 +32,10 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add(
     size_t size,
     const CacheGuard::Lock & lock)
 {
-    return add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+    return std::make_shared<LRUIterator>(add(Entry(key_metadata->key, offset, size, key_metadata), lock));
 }
 
-std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
+LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
 {
     if (entry.size == 0)
     {
@@ -76,7 +76,7 @@ std::unique_ptr<LRUFileCachePriority::LRUIterator> LRUFileCachePriority::add(Ent
         log, "Added entry into LRU queue, key: {}, offset: {}, size: {}",
         entry.key, entry.offset, entry.size);
 
-    return std::make_unique<LRUIterator>(this, iterator);
+    return LRUIterator(this, iterator);
 }
 
 LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CacheGuard::Lock &)
@@ -115,6 +115,26 @@ LRUFileCachePriority::LRUIterator::LRUIterator(
 {
 }
 
+LRUFileCachePriority::LRUIterator::LRUIterator(const LRUIterator & other)
+{
+    *this = other;
+}
+
+LRUFileCachePriority::LRUIterator & LRUFileCachePriority::LRUIterator::operator =(const LRUIterator & other)
+{
+    if (this == &other)
+        return *this;
+
+    cache_priority = other.cache_priority;
+    iterator = other.iterator;
+    return *this;
+}
+
+bool LRUFileCachePriority::LRUIterator::operator ==(const LRUIterator & other) const
+{
+    return cache_priority == other.cache_priority && iterator == other.iterator;
+}
+
 void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & lock)
 {
     for (auto it = queue.begin(); it != queue.end();)
@@ -223,8 +243,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction(
     return can_fit();
 }
 
-std::unique_ptr<LRUFileCachePriority::LRUIterator>
-LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &)
+LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &)
 {
     const auto & entry = it.getEntry();
     if (entry.size == 0)
@@ -254,7 +273,7 @@ LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const
 
     other.updateSize(-entry.size);
     other.updateElementsCount(-1);
-    return std::make_unique<LRUIterator>(this, it.iterator);
+    return LRUIterator(this, it.iterator);
 }
 
 FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 63b93de76e4..289968602ca 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -67,17 +67,22 @@ private:
     using IterateFunc = std::function<IterationResult(LockedKey &, const FileSegmentMetadataPtr &)>;
     void iterate(IterateFunc && func, const CacheGuard::Lock &);
 
-    std::unique_ptr<LRUIterator> move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &);
-    std::unique_ptr<LRUIterator> add(Entry && entry, const CacheGuard::Lock &);
+    LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &);
+    LRUIterator add(Entry && entry, const CacheGuard::Lock &);
 };
 
 class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator
 {
     friend class LRUFileCachePriority;
     friend class SLRUFileCachePriority;
+
 public:
     LRUIterator(LRUFileCachePriority * cache_priority_, LRUQueue::iterator iterator_);
 
+    LRUIterator(const LRUIterator & other);
+    LRUIterator & operator =(const LRUIterator & other);
+    bool operator ==(const LRUIterator & other) const;
+
     const Entry & getEntry() const override { return *iterator; }
 
     size_t increasePriority(const CacheGuard::Lock &) override;
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index a9e017c62e4..dfc3686683d 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -109,7 +109,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction(
             for (const auto & candidate : key_candidates.candidates)
             {
                 auto * candidate_it = assert_cast<SLRUIterator *>(candidate->getQueueIterator().get());
-                candidate_it->lru_iterator = probationary_queue.move(*candidate_it->lru_iterator, protected_queue, lk);
+                candidate_it->lru_iterator = probationary_queue.move(candidate_it->lru_iterator, protected_queue, lk);
                 candidate_it->is_protected = false;
             }
         }
@@ -124,7 +124,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
     /// we only need to increase its priority within the protected queue.
     if (iterator.is_protected)
     {
-        iterator.lru_iterator->increasePriority(lock);
+        iterator.lru_iterator.increasePriority(lock);
         return;
     }
 
@@ -137,7 +137,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
         /// Entry size is bigger than the whole protected queue limit.
         /// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
         /// which is not possible in any realistic cache configuration.
-        iterator.lru_iterator->increasePriority(lock);
+        iterator.lru_iterator.increasePriority(lock);
         return;
     }
 
@@ -153,7 +153,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
         /// We cannot make space for entry to be moved to protected queue
         /// (not enough releasable file segments).
         /// Then just increase its priority within probationary queue.
-        iterator.lru_iterator->increasePriority(lock);
+        iterator.lru_iterator.increasePriority(lock);
         return;
     }
 
@@ -176,7 +176,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
             /// "downgrade" candidates cannot be moved to probationary queue,
             /// so entry cannot be moved to protected queue as well.
             /// Then just increase its priority within probationary queue.
-            iterator.lru_iterator->increasePriority(lock);
+            iterator.lru_iterator.increasePriority(lock);
             return;
         }
         /// Make space for "downgrade" candidates.
@@ -186,14 +186,14 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
     /// All checks passed, now we can move downgrade candidates to
     /// probationary queue and our entry to protected queue.
     Entry entry_copy = iterator.getEntry();
-    iterator.lru_iterator->remove(lock);
+    iterator.lru_iterator.remove(lock);
 
     for (const auto & [key, key_candidates] : downgrade_candidates)
     {
         for (const auto & candidate : key_candidates.candidates)
         {
             auto * candidate_it = assert_cast<SLRUIterator *>(candidate->getQueueIterator().get());
-            candidate_it->lru_iterator = probationary_queue.move(*candidate_it->lru_iterator, protected_queue, lock);
+            candidate_it->lru_iterator = probationary_queue.move(candidate_it->lru_iterator, protected_queue, lock);
             candidate_it->is_protected = false;
         }
     }
@@ -218,10 +218,10 @@ void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock)
 
 SLRUFileCachePriority::SLRUIterator::SLRUIterator(
     SLRUFileCachePriority * cache_priority_,
-    std::unique_ptr<LRUFileCachePriority::LRUIterator> lru_iterator_,
+    LRUFileCachePriority::LRUIterator && lru_iterator_,
     bool is_protected_)
     : cache_priority(cache_priority_)
-    , lru_iterator(std::move(lru_iterator_))
+    , lru_iterator(lru_iterator_)
     , is_protected(is_protected_)
 {
 }
@@ -229,7 +229,7 @@ SLRUFileCachePriority::SLRUIterator::SLRUIterator(
 const SLRUFileCachePriority::Entry & SLRUFileCachePriority::SLRUIterator::getEntry() const
 {
     assertValid();
-    return lru_iterator->getEntry();
+    return lru_iterator.getEntry();
 }
 
 size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CacheGuard::Lock & lock)
@@ -242,26 +242,24 @@ size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CacheGuard::L
 void SLRUFileCachePriority::SLRUIterator::updateSize(int64_t size)
 {
     assertValid();
-    lru_iterator->updateSize(size);
+    lru_iterator.updateSize(size);
 }
 
 void SLRUFileCachePriority::SLRUIterator::invalidate()
 {
     assertValid();
-    lru_iterator->invalidate();
+    lru_iterator.invalidate();
 }
 
 void SLRUFileCachePriority::SLRUIterator::remove(const CacheGuard::Lock & lock)
 {
     assertValid();
-    lru_iterator->remove(lock);
-    lru_iterator = nullptr;
+    lru_iterator.remove(lock);
 }
 
 void SLRUFileCachePriority::SLRUIterator::assertValid() const
 {
-    if (!lru_iterator)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator");
+    lru_iterator.assertValid();
 }
 
 }
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index 46e8f37819f..9dad6c15fee 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -52,7 +52,7 @@ class SLRUFileCachePriority::SLRUIterator : public IFileCachePriority::Iterator
 public:
     SLRUIterator(
         SLRUFileCachePriority * cache_priority_,
-        std::unique_ptr<LRUIterator> lru_iterator_,
+        LRUIterator && lru_iterator_,
         bool is_protected_);
 
     const Entry & getEntry() const override;
@@ -71,7 +71,7 @@ private:
     void assertValid() const;
 
     SLRUFileCachePriority * cache_priority;
-    mutable std::unique_ptr<LRUIterator> lru_iterator;
+    mutable LRUIterator lru_iterator;
     bool is_protected;
 };
 

From 69260e322e7db6e2e7d85f0eca846df89c58ce28 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 16:53:58 +0100
Subject: [PATCH 027/213] Randomize cache policy in ci

---
 docker/test/stateful/run.sh            | 12 ++++++++++++
 docker/test/stress/run.sh              | 21 +++++++++++++++++++++
 src/Interpreters/Cache/FileCache.cpp   |  2 ++
 tests/config/config.d/storage_conf.xml |  4 +++-
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index ad3c3477b37..829b3547856 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -24,6 +24,18 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
 
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
 
+cache_policy="SLRU"
+#TODO: uncomment this before merge, for testing purposes it is SLRU only before merge.
+#if [$(($RANDOM%2)) -eq 1]; then
+#    cache_policy="LRU"
+#fi
+if [ "$cache_policy" = "SLRU" ]; then
+    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+    | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
+    > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+    mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+fi
+
 function start()
 {
     if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index b5092fd40df..02de3c67682 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -65,9 +65,23 @@ chmod 777 -R /var/lib/clickhouse
 clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
 clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
 
+
 stop
 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
 
+# Randomly choose between LRU and SLRU policies.
+cache_policy="SLRU"
+#TODO: uncomment this before merge, for testing purposes it is SLRU only before merge.
+#if [$(($RANDOM%2)) -eq 1]; then
+#    cache_policy="LRU"
+#fi
+if [ "$cache_policy" = "SLRU" ]; then
+    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+    | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
+    > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+    mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+fi
+
 start
 
 clickhouse-client --query "SHOW TABLES FROM datasets"
@@ -191,6 +205,13 @@ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \
    > /etc/clickhouse-server/config.d/logger_trace.xml.tmp
 mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml
 
+if [ "$cache_policy" = "SLRU" ]; then
+    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+    | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
+    > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+    mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+fi
+
 start
 
 stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index da996443e68..0c818e10c9c 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -67,6 +67,8 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
     else
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown cache policy: {}", settings.cache_policy);
 
+    LOG_DEBUG(log, "Using {} cache policy", settings.cache_policy);
+
     if (settings.cache_hits_threshold)
         stash = std::make_unique<HitsCountStash>(settings.cache_hits_threshold, settings.max_elements);
 
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index d976e46ff7b..b8599ef3081 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -13,9 +13,11 @@
                 <type>cache</type>
                 <disk>s3_disk</disk>
                 <path>s3_cache/</path>
-                <max_size>128Mi</max_size>
+                <max_size>64Mi</max_size>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
+                <cache_policy>LRU</cache_policy>
+                <slru_size_ratio>0.3</slru_size_ratio>
             </s3_cache>
             <!-- local disks -->
             <local_disk>

From 0f8d9515ec637bd346baf65d10c1f6b883d4c37c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 Nov 2023 17:53:53 +0100
Subject: [PATCH 028/213] Fix style check

---
 src/Interpreters/Cache/SLRUFileCachePriority.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index dfc3686683d..02ef91c6aa2 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -10,11 +10,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 namespace
 {
     size_t getRatio(size_t total, double ratio)

From 8fac59b18e15a9d2854a08ffb2b485ff81a2902e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 24 Nov 2023 12:58:18 +0100
Subject: [PATCH 029/213] Fix cache load on server startup with slru policy

---
 src/Interpreters/Cache/FileCache.cpp          |  9 ++----
 src/Interpreters/Cache/IFileCachePriority.h   | 10 +++++-
 .../Cache/LRUFileCachePriority.cpp            |  3 +-
 src/Interpreters/Cache/LRUFileCachePriority.h | 10 ++++--
 .../Cache/SLRUFileCachePriority.cpp           | 31 +++++++++++++++++--
 .../Cache/SLRUFileCachePriority.h             |  9 +++++-
 6 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 0c818e10c9c..f1b3d24dbc1 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -1001,9 +1001,6 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
         const auto key = Key::fromKeyString(key_directory.filename().string());
         auto key_metadata = metadata.getKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true);
 
-        const size_t size_limit = main_priority->getSizeLimit();
-        const size_t elements_limit = main_priority->getElementsLimit();
-
         for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it)
         {
             auto offset_with_suffix = offset_it->path().filename().string();
@@ -1044,13 +1041,13 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
 
             bool limits_satisfied;
             IFileCachePriority::IteratorPtr cache_it;
+
             {
                 auto lock = lockCache();
-                limits_satisfied = (size_limit == 0 || main_priority->getSize(lock) + size <= size_limit)
-                    && (elements_limit == 0 || main_priority->getElementsCount(lock) + 1 <= elements_limit);
 
+                limits_satisfied = main_priority->canFit(size, lock);
                 if (limits_satisfied)
-                    cache_it = main_priority->add(key_metadata, offset, size, lock);
+                    cache_it = main_priority->add(key_metadata, offset, size, lock, /* is_startup */true);
 
                 /// TODO: we can get rid of this lockCache() if we first load everything in parallel
                 /// without any mutual lock between loading threads, and only after do removeOverflow().
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index fe5de21ec48..44fb450658b 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -59,7 +59,15 @@ public:
 
     virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
 
-    virtual IteratorPtr add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
+    /// Throws exception if there is not enought size to fit it.
+    virtual IteratorPtr add( /// NOLINT
+        KeyMetadataPtr key_metadata,
+        size_t offset,
+        size_t size,
+        const CacheGuard::Lock &,
+        bool is_startup = false) = 0;
+
+    virtual bool canFit(size_t size, const CacheGuard::Lock &) const = 0;
 
     virtual void shuffle(const CacheGuard::Lock &) = 0;
 
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index f9b0ddfce15..cb4da79af59 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -30,7 +30,8 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add(
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,
-    const CacheGuard::Lock & lock)
+    const CacheGuard::Lock & lock,
+    bool /* is_startup */)
 {
     return std::make_shared<LRUIterator>(add(Entry(key_metadata->key, offset, size, key_metadata), lock));
 }
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 289968602ca..31a5c527ca8 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -25,7 +25,14 @@ public:
 
     size_t getElementsCount(const CacheGuard::Lock &) const override { return current_elements_num; }
 
-    IteratorPtr add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
+    bool canFit(size_t size, const CacheGuard::Lock &) const override;
+
+    IteratorPtr add( /// NOLINT
+        KeyMetadataPtr key_metadata,
+        size_t offset,
+        size_t size,
+        const CacheGuard::Lock &,
+        bool is_startup = false) override;
 
     bool collectCandidatesForEviction(
         size_t size,
@@ -53,7 +60,6 @@ private:
     /// because of invalidated entries.
     std::atomic<size_t> current_elements_num = 0;
 
-    bool canFit(size_t size, const CacheGuard::Lock &) const;
     bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const;
 
     LRUQueue::iterator remove(LRUQueue::iterator it, const CacheGuard::Lock &);
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 02ef91c6aa2..f2c96c4df24 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -41,14 +41,39 @@ size_t SLRUFileCachePriority::getElementsCount(const CacheGuard::Lock & lock) co
     return protected_queue.getElementsCount(lock) + probationary_queue.getElementsCount(lock);
 }
 
+bool SLRUFileCachePriority::canFit(size_t size, const CacheGuard::Lock & lock) const
+{
+    return probationary_queue.canFit(size, lock) || protected_queue.canFit(size, lock);
+}
+
 IFileCachePriority::IteratorPtr SLRUFileCachePriority::add(
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,
-    const CacheGuard::Lock & lock)
+    const CacheGuard::Lock & lock,
+    bool is_startup)
 {
-    auto lru_iterator = probationary_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
-    return std::make_shared<SLRUIterator>(this, std::move(lru_iterator), false);
+    if (is_startup)
+    {
+        /// If it is server startup, we put entries in any queue it will fit in,
+        /// but with preference for probationary queue,
+        /// because we do not know the distribution between queues after server restart.
+        if (probationary_queue.canFit(size, lock))
+        {
+            auto lru_iterator = probationary_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+            return std::make_shared<SLRUIterator>(this, std::move(lru_iterator), false);
+        }
+        else
+        {
+            auto lru_iterator = protected_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+            return std::make_shared<SLRUIterator>(this, std::move(lru_iterator), true);
+        }
+    }
+    else
+    {
+        auto lru_iterator = probationary_queue.add(Entry(key_metadata->key, offset, size, key_metadata), lock);
+        return std::make_shared<SLRUIterator>(this, std::move(lru_iterator), false);
+    }
 }
 
 bool SLRUFileCachePriority::collectCandidatesForEviction(
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index 9dad6c15fee..7753f6f23b2 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -24,7 +24,14 @@ public:
 
     size_t getElementsCount(const CacheGuard::Lock &) const override;
 
-    IteratorPtr add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
+    bool canFit(size_t size, const CacheGuard::Lock &) const override;
+
+    IteratorPtr add( /// NOLINT
+        KeyMetadataPtr key_metadata,
+        size_t offset,
+        size_t size,
+        const CacheGuard::Lock &,
+        bool is_startup = false) override;
 
     bool collectCandidatesForEviction(
         size_t size,

From 5c5cbc9ef507256a13f78aa8e57ea1981b0453ec Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 24 Nov 2023 15:46:45 +0100
Subject: [PATCH 030/213] Fix typo

---
 src/Interpreters/Cache/IFileCachePriority.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index 44fb450658b..e7a05b0e46d 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -59,7 +59,7 @@ public:
 
     virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
 
-    /// Throws exception if there is not enought size to fit it.
+    /// Throws exception if there is not enough size to fit it.
     virtual IteratorPtr add( /// NOLINT
         KeyMetadataPtr key_metadata,
         size_t offset,

From 0f7b5ebd68d44f1789a9186082a9e8fdfa906b34 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 24 Nov 2023 18:42:56 +0100
Subject: [PATCH 031/213] Fix clang-tidy

---
 src/Interpreters/Cache/LRUFileCachePriority.cpp  | 2 +-
 src/Interpreters/Cache/SLRUFileCachePriority.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index cb4da79af59..a596d041941 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -26,7 +26,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-IFileCachePriority::IteratorPtr LRUFileCachePriority::add(
+IFileCachePriority::IteratorPtr LRUFileCachePriority::add( /// NOLINT
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index f2c96c4df24..8f7f5e4aa32 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -46,7 +46,7 @@ bool SLRUFileCachePriority::canFit(size_t size, const CacheGuard::Lock & lock) c
     return probationary_queue.canFit(size, lock) || protected_queue.canFit(size, lock);
 }
 
-IFileCachePriority::IteratorPtr SLRUFileCachePriority::add(
+IFileCachePriority::IteratorPtr SLRUFileCachePriority::add( /// NOLINT
     KeyMetadataPtr key_metadata,
     size_t offset,
     size_t size,

From ca0aa935c1d364eaf04c7fab227731d882d852a1 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 27 Nov 2023 23:56:33 +0100
Subject: [PATCH 032/213] Add Enums support to `substring` function

---
 src/DataTypes/IDataType.h                     |   2 +
 src/Functions/GatherUtils/Sources.h           |  75 +++++++++
 src/Functions/substring.cpp                   |  58 ++++---
 .../00493_substring_of_enum.reference         | 148 ++++++++++++++++++
 .../0_stateless/00493_substring_of_enum.sql   |  24 +++
 5 files changed, 285 insertions(+), 22 deletions(-)
 create mode 100644 tests/queries/0_stateless/00493_substring_of_enum.reference
 create mode 100644 tests/queries/0_stateless/00493_substring_of_enum.sql

diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 4ee615f5f70..98f7e0cb06f 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -447,6 +447,8 @@ template <typename T> inline bool isNativeNumber(const T & data_type) { return W
 template <typename T> inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); }
 
 template <typename T> inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
+template <typename T> inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); }
+template <typename T> inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); }
 
 template <typename T> inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
 template <typename T> inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h
index fad2be1f622..a3b5c21b89c 100644
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@@ -11,6 +11,8 @@
 #include <Common/typeid_cast.h>
 #include <Common/UTF8Helpers.h>
 
+#include <DataTypes/EnumValues.h>
+
 #include "IArraySource.h"
 #include "IValueSource.h"
 #include "Slices.h"
@@ -313,6 +315,79 @@ struct StringSource
     }
 };
 
+template <typename Type>
+struct EnumSource {
+    using Column = ColumnVector<Type>;
+    using Slice = NumericArraySlice<UInt8>;
+
+    using SinkType = StringSink;
+
+    const typename ColumnVector<Type>::Container & data;
+    const DataTypeEnum<Type> & data_type;
+
+    size_t row_num = 0;
+
+    explicit EnumSource(const Column & col, const DataTypeEnum<Type> & data_type_) : data(col.getData()), data_type(data_type_) { }
+
+    void next() { ++row_num; }
+
+    bool isEnd() const { return row_num == data.size(); }
+
+    size_t rowNum() const { return row_num; }
+
+    size_t getSizeForReserve() const { return data.size(); }
+
+    size_t getElementSize() const
+    {
+        StringRef name = data_type.getNameForValue(data[row_num]);
+        return name.size;
+    }
+
+    size_t getColumnSize() const { return data.size(); }
+
+    Slice getWhole() const {
+        StringRef name = data_type.getNameForValue(data[row_num]);
+        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
+        return {name_data, name.size};
+    }
+
+    Slice getSliceFromLeft(size_t offset) const
+    {
+        StringRef name = data_type.getNameForValue(data[row_num]);
+        if (offset >= name.size)
+            return {nullptr, 0};
+        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
+        return {name_data + offset, name.size - offset};
+    }
+
+    Slice getSliceFromLeft(size_t offset, size_t length) const
+    {
+        StringRef name = data_type.getNameForValue(data[row_num]);
+        if (offset >= name.size)
+            return {nullptr, 0};
+        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
+        return {name_data + offset, std::min(length, name.size - offset)};
+    }
+
+    Slice getSliceFromRight(size_t offset) const
+    {
+        StringRef name = data_type.getNameForValue(data[row_num]);
+        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
+        if (offset > name.size)
+            return {name_data, name.size};
+        return {name_data + name.size - offset, offset};
+    }
+
+    Slice getSliceFromRight(size_t offset, size_t length) const
+    {
+        StringRef name = data_type.getNameForValue(data[row_num]);
+        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
+        if (offset > name.size)
+            return {name_data, length + name.size > offset ? std::min(name.size, length + name.size - offset) : 0};
+        return {name_data + name.size - offset, std::min(length, offset)};
+    }
+};
+
 
 /// Differs to StringSource by having 'offset' and 'length' in code points instead of bytes in getSlice* methods.
 /** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size:
diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp
index 7678692f612..f42452c9d99 100644
--- a/src/Functions/substring.cpp
+++ b/src/Functions/substring.cpp
@@ -1,4 +1,5 @@
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeEnum.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnConst.h>
@@ -20,10 +21,10 @@ using namespace GatherUtils;
 
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
 }
 
 namespace
@@ -61,7 +62,7 @@ public:
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: "
                             "passed {}, should be 2 or 3", getName(), number_of_arguments);
 
-        if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
+        if ((is_utf8 && !isString(arguments[0])) || (!isStringOrFixedString(arguments[0]) && !isEnum(arguments[0])))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
                             arguments[0]->getName(), getName());
 
@@ -124,7 +125,7 @@ public:
     {
         size_t number_of_arguments = arguments.size();
 
-        ColumnPtr column_string = arguments[0].column;
+        ColumnPtr column_arg0 = arguments[0].column;
         ColumnPtr column_start = arguments[1].column;
         ColumnPtr column_length;
 
@@ -147,33 +148,46 @@ public:
 
         if constexpr (is_utf8)
         {
-            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_arg0.get()))
                 return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
                                 length_value, UTF8StringSource(*col), input_rows_count);
-            else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
+            if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_arg0.get()))
                 return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
                                 length_value, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
-            else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
-                    arguments[0].column->getName(), getName());
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
+                arguments[0].column->getName(), getName());
         }
         else
         {
-            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_arg0.get()))
                 return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                length_value, StringSource(*col), input_rows_count);
-            else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
+                                        length_value, StringSource(*col), input_rows_count);
+            if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_arg0.get()))
                 return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                length_value, FixedStringSource(*col_fixed), input_rows_count);
-            else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
+                                        length_value, FixedStringSource(*col_fixed), input_rows_count);
+            if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_arg0.get()))
                 return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                length_value, ConstSource<StringSource>(*col_const), input_rows_count);
-            else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
+                                        length_value, ConstSource<StringSource>(*col_const), input_rows_count);
+            if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_arg0.get()))
                 return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                length_value, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
-            else
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
-                    arguments[0].column->getName(), getName());
+                                        length_value, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
+            if (isEnum8(arguments[0].type))
+                if (const ColumnVector<Int8> * col_enum8 = checkAndGetColumn<ColumnVector<Int8>>(column_arg0.get()))
+                {
+                    const auto * enum_type = typeid_cast<const DataTypeEnum<Int8> *>(arguments[0].type.get());
+                    return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                            length_value, EnumSource<Int8>(*col_enum8, *enum_type), input_rows_count);
+                }
+            if (isEnum16(arguments[0].type))
+                if (const ColumnVector<Int16> * col_enum16 = checkAndGetColumn<ColumnVector<Int16>>(column_arg0.get()))
+                {
+                    const auto * enum_type = typeid_cast<const DataTypeEnum<Int16> *>(arguments[0].type.get());
+                    return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
+                                            length_value, EnumSource<Int16>(*col_enum16, *enum_type), input_rows_count);
+                }
+
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
+                arguments[0].column->getName(), getName());
         }
     }
 };
diff --git a/tests/queries/0_stateless/00493_substring_of_enum.reference b/tests/queries/0_stateless/00493_substring_of_enum.reference
new file mode 100644
index 00000000000..427d9c9eafb
--- /dev/null
+++ b/tests/queries/0_stateless/00493_substring_of_enum.reference
@@ -0,0 +1,148 @@
+-- Positive offsets (slice from left)
+Offset: 	1	Length: 	1	hello	shark	h	s
+Offset: 	1	Length: 	2	hello	shark	he	sh
+Offset: 	1	Length: 	3	hello	shark	hel	sha
+Offset: 	1	Length: 	4	hello	shark	hell	shar
+Offset: 	1	Length: 	5	hello	shark	hello	shark
+Offset: 	1	Length: 	6	hello	shark	hello	shark
+Offset: 	2	Length: 	1	ello	hark	e	h
+Offset: 	2	Length: 	2	ello	hark	el	ha
+Offset: 	2	Length: 	3	ello	hark	ell	har
+Offset: 	2	Length: 	4	ello	hark	ello	hark
+Offset: 	2	Length: 	5	ello	hark	ello	hark
+Offset: 	2	Length: 	6	ello	hark	ello	hark
+Offset: 	3	Length: 	1	llo	ark	l	a
+Offset: 	3	Length: 	2	llo	ark	ll	ar
+Offset: 	3	Length: 	3	llo	ark	llo	ark
+Offset: 	3	Length: 	4	llo	ark	llo	ark
+Offset: 	3	Length: 	5	llo	ark	llo	ark
+Offset: 	3	Length: 	6	llo	ark	llo	ark
+Offset: 	4	Length: 	1	lo	rk	l	r
+Offset: 	4	Length: 	2	lo	rk	lo	rk
+Offset: 	4	Length: 	3	lo	rk	lo	rk
+Offset: 	4	Length: 	4	lo	rk	lo	rk
+Offset: 	4	Length: 	5	lo	rk	lo	rk
+Offset: 	4	Length: 	6	lo	rk	lo	rk
+Offset: 	5	Length: 	1	o	k	o	k
+Offset: 	5	Length: 	2	o	k	o	k
+Offset: 	5	Length: 	3	o	k	o	k
+Offset: 	5	Length: 	4	o	k	o	k
+Offset: 	5	Length: 	5	o	k	o	k
+Offset: 	5	Length: 	6	o	k	o	k
+Offset: 	6	Length: 	1				
+Offset: 	6	Length: 	2				
+Offset: 	6	Length: 	3				
+Offset: 	6	Length: 	4				
+Offset: 	6	Length: 	5				
+Offset: 	6	Length: 	6				
+Offset: 	1	Length: 	1	world	eagle	w	e
+Offset: 	1	Length: 	2	world	eagle	wo	ea
+Offset: 	1	Length: 	3	world	eagle	wor	eag
+Offset: 	1	Length: 	4	world	eagle	worl	eagl
+Offset: 	1	Length: 	5	world	eagle	world	eagle
+Offset: 	1	Length: 	6	world	eagle	world	eagle
+Offset: 	2	Length: 	1	orld	agle	o	a
+Offset: 	2	Length: 	2	orld	agle	or	ag
+Offset: 	2	Length: 	3	orld	agle	orl	agl
+Offset: 	2	Length: 	4	orld	agle	orld	agle
+Offset: 	2	Length: 	5	orld	agle	orld	agle
+Offset: 	2	Length: 	6	orld	agle	orld	agle
+Offset: 	3	Length: 	1	rld	gle	r	g
+Offset: 	3	Length: 	2	rld	gle	rl	gl
+Offset: 	3	Length: 	3	rld	gle	rld	gle
+Offset: 	3	Length: 	4	rld	gle	rld	gle
+Offset: 	3	Length: 	5	rld	gle	rld	gle
+Offset: 	3	Length: 	6	rld	gle	rld	gle
+Offset: 	4	Length: 	1	ld	le	l	l
+Offset: 	4	Length: 	2	ld	le	ld	le
+Offset: 	4	Length: 	3	ld	le	ld	le
+Offset: 	4	Length: 	4	ld	le	ld	le
+Offset: 	4	Length: 	5	ld	le	ld	le
+Offset: 	4	Length: 	6	ld	le	ld	le
+Offset: 	5	Length: 	1	d	e	d	e
+Offset: 	5	Length: 	2	d	e	d	e
+Offset: 	5	Length: 	3	d	e	d	e
+Offset: 	5	Length: 	4	d	e	d	e
+Offset: 	5	Length: 	5	d	e	d	e
+Offset: 	5	Length: 	6	d	e	d	e
+Offset: 	6	Length: 	1				
+Offset: 	6	Length: 	2				
+Offset: 	6	Length: 	3				
+Offset: 	6	Length: 	4				
+Offset: 	6	Length: 	5				
+Offset: 	6	Length: 	6				
+-- Negative offsets (slice from right)
+Offset: 	-1	Length: 	1	o	k	o	k
+Offset: 	-1	Length: 	2	o	k	o	k
+Offset: 	-1	Length: 	3	o	k	o	k
+Offset: 	-1	Length: 	4	o	k	o	k
+Offset: 	-1	Length: 	5	o	k	o	k
+Offset: 	-1	Length: 	6	o	k	o	k
+Offset: 	-2	Length: 	1	lo	rk	l	r
+Offset: 	-2	Length: 	2	lo	rk	lo	rk
+Offset: 	-2	Length: 	3	lo	rk	lo	rk
+Offset: 	-2	Length: 	4	lo	rk	lo	rk
+Offset: 	-2	Length: 	5	lo	rk	lo	rk
+Offset: 	-2	Length: 	6	lo	rk	lo	rk
+Offset: 	-3	Length: 	1	llo	ark	l	a
+Offset: 	-3	Length: 	2	llo	ark	ll	ar
+Offset: 	-3	Length: 	3	llo	ark	llo	ark
+Offset: 	-3	Length: 	4	llo	ark	llo	ark
+Offset: 	-3	Length: 	5	llo	ark	llo	ark
+Offset: 	-3	Length: 	6	llo	ark	llo	ark
+Offset: 	-4	Length: 	1	ello	hark	e	h
+Offset: 	-4	Length: 	2	ello	hark	el	ha
+Offset: 	-4	Length: 	3	ello	hark	ell	har
+Offset: 	-4	Length: 	4	ello	hark	ello	hark
+Offset: 	-4	Length: 	5	ello	hark	ello	hark
+Offset: 	-4	Length: 	6	ello	hark	ello	hark
+Offset: 	-5	Length: 	1	hello	shark	h	s
+Offset: 	-5	Length: 	2	hello	shark	he	sh
+Offset: 	-5	Length: 	3	hello	shark	hel	sha
+Offset: 	-5	Length: 	4	hello	shark	hell	shar
+Offset: 	-5	Length: 	5	hello	shark	hello	shark
+Offset: 	-5	Length: 	6	hello	shark	hello	shark
+Offset: 	-6	Length: 	1	hello	shark		
+Offset: 	-6	Length: 	2	hello	shark	h	s
+Offset: 	-6	Length: 	3	hello	shark	he	sh
+Offset: 	-6	Length: 	4	hello	shark	hel	sha
+Offset: 	-6	Length: 	5	hello	shark	hell	shar
+Offset: 	-6	Length: 	6	hello	shark	hello	shark
+Offset: 	-1	Length: 	1	d	e	d	e
+Offset: 	-1	Length: 	2	d	e	d	e
+Offset: 	-1	Length: 	3	d	e	d	e
+Offset: 	-1	Length: 	4	d	e	d	e
+Offset: 	-1	Length: 	5	d	e	d	e
+Offset: 	-1	Length: 	6	d	e	d	e
+Offset: 	-2	Length: 	1	ld	le	l	l
+Offset: 	-2	Length: 	2	ld	le	ld	le
+Offset: 	-2	Length: 	3	ld	le	ld	le
+Offset: 	-2	Length: 	4	ld	le	ld	le
+Offset: 	-2	Length: 	5	ld	le	ld	le
+Offset: 	-2	Length: 	6	ld	le	ld	le
+Offset: 	-3	Length: 	1	rld	gle	r	g
+Offset: 	-3	Length: 	2	rld	gle	rl	gl
+Offset: 	-3	Length: 	3	rld	gle	rld	gle
+Offset: 	-3	Length: 	4	rld	gle	rld	gle
+Offset: 	-3	Length: 	5	rld	gle	rld	gle
+Offset: 	-3	Length: 	6	rld	gle	rld	gle
+Offset: 	-4	Length: 	1	orld	agle	o	a
+Offset: 	-4	Length: 	2	orld	agle	or	ag
+Offset: 	-4	Length: 	3	orld	agle	orl	agl
+Offset: 	-4	Length: 	4	orld	agle	orld	agle
+Offset: 	-4	Length: 	5	orld	agle	orld	agle
+Offset: 	-4	Length: 	6	orld	agle	orld	agle
+Offset: 	-5	Length: 	1	world	eagle	w	e
+Offset: 	-5	Length: 	2	world	eagle	wo	ea
+Offset: 	-5	Length: 	3	world	eagle	wor	eag
+Offset: 	-5	Length: 	4	world	eagle	worl	eagl
+Offset: 	-5	Length: 	5	world	eagle	world	eagle
+Offset: 	-5	Length: 	6	world	eagle	world	eagle
+Offset: 	-6	Length: 	1	world	eagle		
+Offset: 	-6	Length: 	2	world	eagle	w	e
+Offset: 	-6	Length: 	3	world	eagle	wo	ea
+Offset: 	-6	Length: 	4	world	eagle	wor	eag
+Offset: 	-6	Length: 	5	world	eagle	worl	eagl
+Offset: 	-6	Length: 	6	world	eagle	world	eagle
+-- Constant enums
+f	fo
diff --git a/tests/queries/0_stateless/00493_substring_of_enum.sql b/tests/queries/0_stateless/00493_substring_of_enum.sql
new file mode 100644
index 00000000000..39d0014bde9
--- /dev/null
+++ b/tests/queries/0_stateless/00493_substring_of_enum.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS substring_enums_test;
+CREATE TABLE substring_enums_test(e8 Enum('hello' = -5, 'world' = 15), e16 Enum('shark' = -999, 'eagle' = 9999)) ENGINE MergeTree ORDER BY tuple();
+INSERT INTO TABLE substring_enums_test VALUES ('hello', 'shark'), ('world', 'eagle');
+
+SELECT '-- Positive offsets (slice from left)';
+WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6),
+     permutations AS (SELECT c1.n AS offset, c2.n AS length FROM cte AS c1 CROSS JOIN cte AS c2)
+SELECT 'Offset: ', p.offset, 'Length: ', p.length,
+       substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2,
+       substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
+FROM substring_enums_test LEFT JOIN permutations AS p ON true;
+
+SELECT '-- Negative offsets (slice from right)';
+WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6),
+     permutations AS (SELECT -c1.n AS offset, c2.n AS length FROM cte AS c1 CROSS JOIN cte AS c2)
+SELECT 'Offset: ', p.offset, 'Length: ', p.length,
+       substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2,
+       substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
+FROM substring_enums_test LEFT JOIN permutations AS p ON true;
+
+SELECT '-- Constant enums';
+SELECT substring(CAST('foo', 'Enum8(\'foo\' = 1)'), 1, 1), substring(CAST('foo', 'Enum16(\'foo\' = 1111)'), 1, 2);
+
+DROP TABLE substring_enums_test;

From 497e5abc4793a8c486192593fc48c0b67db4d483 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Tue, 28 Nov 2023 00:09:36 +0100
Subject: [PATCH 033/213] Add a doc entry.

---
 docs/en/sql-reference/functions/string-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 1940993ce0b..8ecd5af9258 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -579,7 +579,7 @@ A function is called injective if it returns for different arguments different r
 
 ## substring(s, offset, length)
 
-Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1.
+Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. Can be also used with [Enum](../../sql-reference/data-types/enum.md) types.
 
 **Syntax**
 

From ff6bdfe8576b632b68700a8b8b220602fbc6b041 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Tue, 28 Nov 2023 00:43:29 +0100
Subject: [PATCH 034/213] Add more substring with enums tests

---
 .../queries/0_stateless/00493_substring_of_enum.reference | 7 +++++++
 tests/queries/0_stateless/00493_substring_of_enum.sql     | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/tests/queries/0_stateless/00493_substring_of_enum.reference b/tests/queries/0_stateless/00493_substring_of_enum.reference
index 427d9c9eafb..17bc960b318 100644
--- a/tests/queries/0_stateless/00493_substring_of_enum.reference
+++ b/tests/queries/0_stateless/00493_substring_of_enum.reference
@@ -144,5 +144,12 @@ Offset: 	-6	Length: 	3	world	eagle	wo	ea
 Offset: 	-6	Length: 	4	world	eagle	wor	eag
 Offset: 	-6	Length: 	5	world	eagle	worl	eagl
 Offset: 	-6	Length: 	6	world	eagle	world	eagle
+-- Zero offset/length
+Offset: 	0	Length: 	0				
+Offset: 	0	Length: 	1				
+Offset: 	1	Length: 	0	hello	shark		
+Offset: 	0	Length: 	0				
+Offset: 	0	Length: 	1				
+Offset: 	1	Length: 	0	world	eagle		
 -- Constant enums
 f	fo
diff --git a/tests/queries/0_stateless/00493_substring_of_enum.sql b/tests/queries/0_stateless/00493_substring_of_enum.sql
index 39d0014bde9..ba9fc630490 100644
--- a/tests/queries/0_stateless/00493_substring_of_enum.sql
+++ b/tests/queries/0_stateless/00493_substring_of_enum.sql
@@ -18,6 +18,14 @@ SELECT 'Offset: ', p.offset, 'Length: ', p.length,
        substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
 FROM substring_enums_test LEFT JOIN permutations AS p ON true;
 
+SELECT '-- Zero offset/length';
+WITH cte AS (SELECT number AS n FROM system.numbers LIMIT 2),
+     permutations AS (SELECT c1.n AS offset, c2.n AS length FROM cte AS c1 CROSS JOIN cte AS c2 LIMIT 3)
+SELECT 'Offset: ', p.offset, 'Length: ', p.length,
+       substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2,
+       substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
+FROM substring_enums_test LEFT JOIN permutations AS p ON true;
+
 SELECT '-- Constant enums';
 SELECT substring(CAST('foo', 'Enum8(\'foo\' = 1)'), 1, 1), substring(CAST('foo', 'Enum16(\'foo\' = 1111)'), 1, 2);
 

From 98a03ac36c62782116b390a09bcf75b30251eea1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 28 Nov 2023 18:28:29 +0800
Subject: [PATCH 035/213] support table

---
 src/Common/CurrentMetrics.cpp     |  2 ++
 src/Databases/DatabaseLazy.cpp    |  1 +
 src/Databases/DatabaseOnDisk.cpp  |  1 +
 src/Databases/DatabasesCommon.cpp | 10 ++++++++++
 4 files changed, 14 insertions(+)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 5a4b6e80f75..c592ae75150 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -212,6 +212,8 @@
     M(PartsCommitted, "Deprecated. See PartsActive.") \
     M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \
     M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \
+    M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \
+    M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \
     M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
     M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
     M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 896ae99656f..9070c0b6ee7 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -64,6 +64,7 @@ void DatabaseLazy::createTable(
     SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     if (!endsWith(table->getName(), "Log"))
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lazy engine can be used only with *Log tables.");
+    std::cout<<"======= Flag 1"<<std::endl;
     DatabaseOnDisk::createTable(local_context, table_name, table, query);
 
     /// DatabaseOnDisk::createTable renames file, so we need to get new metadata_modification_time.
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 549711150b8..f1fc6bbdd7a 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -263,6 +263,7 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
     try
     {
         /// Add a table to the map of known tables.
+        std::cout<<"===== flag 3"<<std::endl;
         attachTable(query_context, query.getTable(), table, getTableDataPath(query));
 
         /// If it was ATTACH query and file with table metadata already exist
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index 9b85e7194d3..2bdca2398b5 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -8,11 +8,17 @@
 #include <Storages/StorageDictionary.h>
 #include <Storages/StorageFactory.h>
 #include <Common/typeid_cast.h>
+#include <Common/CurrentMetrics.h>
 #include <Common/escapeForFileName.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Backups/BackupEntriesCollector.h>
 #include <Backups/RestorerFromBackup.h>
 
+namespace CurrentMetrics
+{
+    const extern Metric AttachedTable;
+}
+
 
 namespace DB
 {
@@ -243,6 +249,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
     res = it->second;
     tables.erase(it);
     res->is_detached = true;
+    CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
 
     auto table_id = res->getStorageID();
     if (table_id.hasUUID())
@@ -256,12 +263,14 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
 
 void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
 {
+    std::cout<<"========= Flag 5"<<std::endl;
     std::lock_guard lock(mutex);
     attachTableUnlocked(table_name, table);
 }
 
 void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, const StoragePtr & table)
 {
+    std::cout<<"========= Flag 6"<<std::endl;
     auto table_id = table->getStorageID();
     if (table_id.database_name != database_name)
         throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed to `{}`, cannot create table in `{}`",
@@ -283,6 +292,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
     /// It is important to reset is_detached here since in case of RENAME in
     /// non-Atomic database the is_detached is set to true before RENAME.
     table->is_detached = false;
+    CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
 }
 
 void DatabaseWithOwnTablesBase::shutdown()

From ade60b5b45283adddfcf166d5a5c865c7a4da16f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 14:12:26 +0100
Subject: [PATCH 036/213] Randomize disabled optimizations in CI

---
 tests/clickhouse-test | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 115e5ac7ba3..bd2f2ca1d5d 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -555,8 +555,15 @@ class SettingsRandomizer:
         "prefer_localhost_replica": lambda: random.randint(0, 1),
         "max_block_size": lambda: random.randint(8000, 100000),
         "max_threads": lambda: random.randint(1, 64),
-        "optimize_or_like_chain": lambda: random.randint(0, 1),
+        "optimize_append_index": lambda: random.randint(0, 1),
+        "optimize_functions_to_subcolumns": lambda: random.randint(0, 1),
+        "optimize_syntax_fuse_functions": lambda: random.randint(0, 1),
+        "optimize_if_chain_to_multiif": lambda: random.randint(0, 1),
+        "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1),
         "optimize_read_in_order": lambda: random.randint(0, 1),
+        "optimize_or_like_chain": lambda: random.randint(0, 1),
+        "optimize_substitute_columns": lambda: random.randint(0, 1),
+        "optimize_using_constraints": lambda: random.randint(0, 1),
         "enable_multiple_prewhere_read_steps": lambda: random.randint(0, 1),
         "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
         "optimize_aggregation_in_order": lambda: random.randint(0, 1),

From b56b48d2de0aced8c40c3e00591d3e0c8448812f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 28 Nov 2023 14:59:20 +0000
Subject: [PATCH 037/213] Update docs

---
 .../functions/string-functions.md             | 36 ++++++++++++++++---
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 8ecd5af9258..baf08e18f11 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -577,26 +577,52 @@ Like `concatWithSeparator` but assumes that `concatWithSeparator(sep, expr1, exp
 
 A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result.
 
-## substring(s, offset, length)
+## substring
 
-Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. Can be also used with [Enum](../../sql-reference/data-types/enum.md) types.
+Returns the substring of a string `s` which starts at the specified byte index `offset`. Byte counting starts from 1. If `offset` is 0, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.
 
 **Syntax**
 
 ```sql
-substring(s, offset, length)
+substring(s, offset[, length])
 ```
 
 Alias:
 - `substr`
 - `mid`
 
+**Arguments**
+
+- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
+- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
+- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
+
+**Returned value**
+
+A substring of `s` with `length` many bytes, starting at index `offset`.
+
+Type: `String`.
+
+**Example**
+
+``` sql
+SELECT 'database' AS db, substr(db, 5), substr(db, 5, 1)
+```
+
+Result:
+
+```result
+┌─db───────┬─substring('database', 5)─┬─substring('database', 5, 1)─┐
+│ database │ base                     │ b                           │
+└──────────┴──────────────────────────┴─────────────────────────────┘
+```
+
 ## substringUTF8
 
 Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
 
 
-## substringIndex(s, delim, count)
+## substringIndex
 
 Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL.
 
@@ -627,7 +653,7 @@ Result:
 └──────────────────────────────────────────────┘
 ```
 
-## substringIndexUTF8(s, delim, count)
+## substringIndexUTF8
 
 Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
 

From 025fcd3aa4ca3867c1c3510adb4804d7b85076c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Nov 2023 18:22:49 +0100
Subject: [PATCH 038/213] Enable the settings to do a full run with them
 enabled

---
 tests/clickhouse-test | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index bd2f2ca1d5d..dc6614471a4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -555,15 +555,15 @@ class SettingsRandomizer:
         "prefer_localhost_replica": lambda: random.randint(0, 1),
         "max_block_size": lambda: random.randint(8000, 100000),
         "max_threads": lambda: random.randint(1, 64),
-        "optimize_append_index": lambda: random.randint(0, 1),
-        "optimize_functions_to_subcolumns": lambda: random.randint(0, 1),
-        "optimize_syntax_fuse_functions": lambda: random.randint(0, 1),
-        "optimize_if_chain_to_multiif": lambda: random.randint(0, 1),
-        "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1),
+        "optimize_append_index": lambda: random.randint(1, 1),
+        "optimize_functions_to_subcolumns": lambda: random.randint(1, 1),
+        "optimize_syntax_fuse_functions": lambda: random.randint(1, 1),
+        "optimize_if_chain_to_multiif": lambda: random.randint(1, 1),
+        "optimize_if_transform_strings_to_enum": lambda: random.randint(1, 1),
         "optimize_read_in_order": lambda: random.randint(0, 1),
-        "optimize_or_like_chain": lambda: random.randint(0, 1),
-        "optimize_substitute_columns": lambda: random.randint(0, 1),
-        "optimize_using_constraints": lambda: random.randint(0, 1),
+        "optimize_or_like_chain": lambda: random.randint(1, 1),
+        "optimize_substitute_columns": lambda: random.randint(1, 1),
+        "optimize_using_constraints": lambda: random.randint(1, 1),
         "enable_multiple_prewhere_read_steps": lambda: random.randint(0, 1),
         "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
         "optimize_aggregation_in_order": lambda: random.randint(0, 1),

From 157555a45bcd11949a496603b224b5f07883fad8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 28 Nov 2023 16:59:12 +0000
Subject: [PATCH 039/213] Some fixups and consistency fixes

---
 src/DataTypes/IDataType.h                     |   2 +-
 src/Functions/GatherUtils/Sources.h           | 113 +++++++-----
 src/Functions/substring.cpp                   | 161 ++++++++----------
 .../0_stateless/00493_substring_of_enum.sql   |  14 +-
 4 files changed, 150 insertions(+), 140 deletions(-)

diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 98f7e0cb06f..e287b5879a2 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -446,9 +446,9 @@ template <typename T> inline bool isFloat(const T & data_type) { return WhichDat
 template <typename T> inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); }
 template <typename T> inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); }
 
-template <typename T> inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
 template <typename T> inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); }
 template <typename T> inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); }
+template <typename T> inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
 
 template <typename T> inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
 template <typename T> inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h
index a3b5c21b89c..329f71203bf 100644
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@@ -58,8 +58,8 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
     }
 
     explicit NumericArraySource(const ColumnArray & arr)
-            : column(typeid_cast<const ColVecType &>(arr.getData()))
-            , elements(typeid_cast<const ColVecType &>(arr.getData()).getData()), offsets(arr.getOffsets())
+        : column(typeid_cast<const ColVecType &>(arr.getData()))
+        , elements(typeid_cast<const ColVecType &>(arr.getData()).getData()), offsets(arr.getOffsets())
     {
     }
 
@@ -156,17 +156,22 @@ struct ConstSource : public Base
     size_t row_num = 0;
 
     explicit ConstSource(const ColumnConst & col_)
-            : Base(static_cast<const typename Base::Column &>(col_.getDataColumn())), total_rows(col_.size())
+        : Base(static_cast<const typename Base::Column &>(col_.getDataColumn()))
+        , total_rows(col_.size())
     {
     }
 
     template <typename ColumnType>
-    ConstSource(const ColumnType & col_, size_t total_rows_) : Base(col_), total_rows(total_rows_)
+    ConstSource(const ColumnType & col_, size_t total_rows_)
+        : Base(col_)
+        , total_rows(total_rows_)
     {
     }
 
     template <typename ColumnType>
-    ConstSource(const ColumnType & col_, const NullMap & null_map_, size_t total_rows_) : Base(col_, null_map_), total_rows(total_rows_)
+    ConstSource(const ColumnType & col_, const NullMap & null_map_, size_t total_rows_)
+        : Base(col_, null_map_)
+        , total_rows(total_rows_)
     {
     }
 
@@ -242,7 +247,8 @@ struct StringSource
     ColumnString::Offset prev_offset = 0;
 
     explicit StringSource(const ColumnString & col)
-            : elements(col.getChars()), offsets(col.getOffsets())
+        : elements(col.getChars())
+        , offsets(col.getOffsets())
     {
     }
 
@@ -315,76 +321,91 @@ struct StringSource
     }
 };
 
-template <typename Type>
+/// Treats Enum values as Strings, modeled after StringSource
+template <typename EnumDataType>
 struct EnumSource {
-    using Column = ColumnVector<Type>;
+    using Column = typename EnumDataType::ColumnType;
     using Slice = NumericArraySlice<UInt8>;
 
     using SinkType = StringSink;
 
-    const typename ColumnVector<Type>::Container & data;
-    const DataTypeEnum<Type> & data_type;
+    const typename Column::Container & data;
+    const EnumDataType & data_type;
 
     size_t row_num = 0;
 
-    explicit EnumSource(const Column & col, const DataTypeEnum<Type> & data_type_) : data(col.getData()), data_type(data_type_) { }
+    EnumSource(const Column & col, const EnumDataType & data_type_)
+        : data(col.getData())
+        , data_type(data_type_)
+    {
+    }
 
-    void next() { ++row_num; }
+    void next()
+    {
+        ++row_num;
+    }
 
-    bool isEnd() const { return row_num == data.size(); }
+    bool isEnd() const
+    {
+        return row_num == data.size();
+    }
 
-    size_t rowNum() const { return row_num; }
+    size_t rowNum() const
+    {
+        return row_num;
+    }
 
-    size_t getSizeForReserve() const { return data.size(); }
+    size_t getSizeForReserve() const
+    {
+        return data.size();
+    }
 
     size_t getElementSize() const
     {
-        StringRef name = data_type.getNameForValue(data[row_num]);
-        return name.size;
+        std::string_view name = data_type.getNameForValue(data[row_num]).toView();
+        return name.size();
     }
 
-    size_t getColumnSize() const { return data.size(); }
+    size_t getColumnSize() const
+    {
+        return data.size();
+    }
 
     Slice getWhole() const {
-        StringRef name = data_type.getNameForValue(data[row_num]);
-        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
-        return {name_data, name.size};
+        std::string_view name = data_type.getNameForValue(data[row_num]).toView();
+        return {reinterpret_cast<const UInt8 *>(name.data()), name.size()};
     }
 
     Slice getSliceFromLeft(size_t offset) const
     {
-        StringRef name = data_type.getNameForValue(data[row_num]);
-        if (offset >= name.size)
-            return {nullptr, 0};
-        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
-        return {name_data + offset, name.size - offset};
+        std::string_view name = data_type.getNameForValue(data[row_num]).toView();
+        if (offset >= name.size())
+            return {reinterpret_cast<const UInt8 *>(name.data()), 0};
+        return {reinterpret_cast<const UInt8 *>(name.data()) + offset, name.size() - offset};
     }
 
     Slice getSliceFromLeft(size_t offset, size_t length) const
     {
-        StringRef name = data_type.getNameForValue(data[row_num]);
-        if (offset >= name.size)
-            return {nullptr, 0};
-        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
-        return {name_data + offset, std::min(length, name.size - offset)};
+        std::string_view name = data_type.getNameForValue(data[row_num]).toView();
+        if (offset >= name.size())
+            return {reinterpret_cast<const UInt8 *>(name.data()), 0};
+        return {reinterpret_cast<const UInt8 *>(name.data()) + offset, std::min(length, name.size() - offset)};
     }
 
     Slice getSliceFromRight(size_t offset) const
     {
-        StringRef name = data_type.getNameForValue(data[row_num]);
-        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
-        if (offset > name.size)
-            return {name_data, name.size};
-        return {name_data + name.size - offset, offset};
+        std::string_view name = data_type.getNameForValue(data[row_num]).toView();
+        if (offset > name.size())
+            return {reinterpret_cast<const UInt8 *>(name.data()), name.size()};
+        return {reinterpret_cast<const UInt8 *>(name.data()) + name.size() - offset, offset};
     }
 
     Slice getSliceFromRight(size_t offset, size_t length) const
     {
-        StringRef name = data_type.getNameForValue(data[row_num]);
-        const UInt8 * name_data = reinterpret_cast<const UInt8 *>(name.data);
-        if (offset > name.size)
-            return {name_data, length + name.size > offset ? std::min(name.size, length + name.size - offset) : 0};
-        return {name_data + name.size - offset, std::min(length, offset)};
+        std::string_view name = data_type.getNameForValue(data[row_num]).toView();
+        if (offset > name.size())
+            return {reinterpret_cast<const UInt8 *>(name.data()), length + name.size() > offset ? std::min(name.size(), length + name.size() - offset) : 0};
+        return {reinterpret_cast<const UInt8 *>(name.data()) + name.size() - offset, std::min(length, offset)};
     }
 };
 
@@ -494,7 +515,7 @@ struct FixedStringSource
     size_t column_size = 0;
 
     explicit FixedStringSource(const ColumnFixedString & col)
-            : string_size(col.getN())
+        : string_size(col.getN())
     {
         const auto & chars = col.getChars();
         pos = chars.data();
@@ -628,7 +649,8 @@ struct GenericArraySource : public ArraySourceImpl<GenericArraySource>
     }
 
     explicit GenericArraySource(const ColumnArray & arr)
-            : elements(arr.getData()), offsets(arr.getOffsets())
+        : elements(arr.getData())
+        , offsets(arr.getOffsets())
     {
     }
 
@@ -888,7 +910,10 @@ struct NullableValueSource : public ValueSource
     const NullMap & null_map;
 
     template <typename Column>
-    explicit NullableValueSource(const Column & col, const NullMap & null_map_) : ValueSource(col), null_map(null_map_) {}
+    NullableValueSource(const Column & col, const NullMap & null_map_)
+        : ValueSource(col)
+        , null_map(null_map_)
+    {}
 
     void accept(ValueSourceVisitor & visitor) override { visitor.visit(*this); }
 
diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp
index f42452c9d99..ac6a24fbc11 100644
--- a/src/Functions/substring.cpp
+++ b/src/Functions/substring.cpp
@@ -1,16 +1,16 @@
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeEnum.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnConst.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
+#include <Functions/GatherUtils/Algorithms.h>
 #include <Functions/GatherUtils/GatherUtils.h>
-#include <Functions/GatherUtils/Sources.h>
 #include <Functions/GatherUtils/Sinks.h>
 #include <Functions/GatherUtils/Slices.h>
-#include <Functions/GatherUtils/Algorithms.h>
+#include <Functions/GatherUtils/Sources.h>
+#include <Functions/IFunction.h>
 #include <IO/WriteHelpers.h>
 
 
@@ -31,40 +31,40 @@ namespace
 {
 
 /// If 'is_utf8' - measure offset and length in code points instead of bytes.
-/// UTF8 variant is not available for FixedString arguments.
 template <bool is_utf8>
 class FunctionSubstring : public IFunction
 {
 public:
     static constexpr auto name = is_utf8 ? "substringUTF8" : "substring";
-    static FunctionPtr create(ContextPtr)
-    {
-        return std::make_shared<FunctionSubstring>();
-    }
-
-    String getName() const override
-    {
-        return name;
-    }
 
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSubstring>(); }
+    String getName() const override { return name; }
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
-
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
     bool useDefaultImplementationForConstants() const override { return true; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
-        size_t number_of_arguments = arguments.size();
+        const size_t number_of_arguments = arguments.size();
 
         if (number_of_arguments < 2 || number_of_arguments > 3)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: "
                             "passed {}, should be 2 or 3", getName(), number_of_arguments);
 
-        if ((is_utf8 && !isString(arguments[0])) || (!isStringOrFixedString(arguments[0]) && !isEnum(arguments[0])))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
-                            arguments[0]->getName(), getName());
+        if constexpr (is_utf8)
+        {
+            /// UTF8 variant is not available for FixedString and Enum arguments.
+            if (!isString(arguments[0]))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}",
+                                arguments[0]->getName(), getName());
+        }
+        else
+        {
+            if (!isStringOrFixedString(arguments[0]) && !isEnum(arguments[0]))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}",
+                                arguments[0]->getName(), getName());
+        }
 
         if (!isNativeNumber(arguments[1]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}",
@@ -78,44 +78,40 @@ public:
     }
 
     template <typename Source>
-    ColumnPtr executeForSource(const ColumnPtr & column_start, const ColumnPtr & column_length,
-                          const ColumnConst * column_start_const, const ColumnConst * column_length_const,
-                          Int64 start_value, Int64 length_value, Source && source,
-                          size_t input_rows_count) const
+    ColumnPtr executeForSource(const ColumnPtr & column_offset, const ColumnPtr & column_length,
+                          bool column_offset_const, bool column_length_const,
+                          Int64 offset, Int64 length,
+                          Source && source, size_t input_rows_count) const
     {
         auto col_res = ColumnString::create();
 
         if (!column_length)
         {
-            if (column_start_const)
+            if (column_offset_const)
             {
-                if (start_value > 0)
-                    sliceFromLeftConstantOffsetUnbounded(
-                        source, StringSink(*col_res, input_rows_count), static_cast<size_t>(start_value - 1));
-                else if (start_value < 0)
-                    sliceFromRightConstantOffsetUnbounded(
-                        source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(start_value));
+                if (offset > 0)
+                    sliceFromLeftConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), static_cast<size_t>(offset - 1));
+                else if (offset < 0)
+                    sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(offset));
                 else
                     throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based");
             }
             else
-                sliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_start);
+                sliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_offset);
         }
         else
         {
-            if (column_start_const && column_length_const)
+            if (column_offset_const && column_length_const)
             {
-                if (start_value > 0)
-                    sliceFromLeftConstantOffsetBounded(
-                        source, StringSink(*col_res, input_rows_count), static_cast<size_t>(start_value - 1), length_value);
-                else if (start_value < 0)
-                    sliceFromRightConstantOffsetBounded(
-                        source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(start_value), length_value);
+                if (offset > 0)
+                    sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), static_cast<size_t>(offset - 1), length);
+                else if (offset < 0)
+                    sliceFromRightConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(offset), length);
                 else
                     throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based");
             }
             else
-                sliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length);
+                sliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_offset, *column_length);
         }
 
         return col_res;
@@ -123,71 +119,60 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-        size_t number_of_arguments = arguments.size();
+        const size_t number_of_arguments = arguments.size();
 
-        ColumnPtr column_arg0 = arguments[0].column;
-        ColumnPtr column_start = arguments[1].column;
+        ColumnPtr column_string = arguments[0].column;
+        ColumnPtr column_offset = arguments[1].column;
         ColumnPtr column_length;
-
         if (number_of_arguments == 3)
             column_length = arguments[2].column;
 
-        const ColumnConst * column_start_const = checkAndGetColumn<ColumnConst>(column_start.get());
+        const ColumnConst * column_offset_const = checkAndGetColumn<ColumnConst>(column_offset.get());
         const ColumnConst * column_length_const = nullptr;
-
         if (number_of_arguments == 3)
             column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
 
-        Int64 start_value = 0;
-        Int64 length_value = 0;
+        Int64 offset = 0;
+        Int64 length = 0;
 
-        if (column_start_const)
-            start_value = column_start_const->getInt(0);
+        if (column_offset_const)
+            offset = column_offset_const->getInt(0);
         if (column_length_const)
-            length_value = column_length_const->getInt(0);
+            length = column_length_const->getInt(0);
 
         if constexpr (is_utf8)
         {
-            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_arg0.get()))
-                return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                length_value, UTF8StringSource(*col), input_rows_count);
-            if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_arg0.get()))
-                return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                length_value, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
-                arguments[0].column->getName(), getName());
+            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, UTF8StringSource(*col), input_rows_count);
+            if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
+                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
         }
         else
         {
-            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_arg0.get()))
-                return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                        length_value, StringSource(*col), input_rows_count);
-            if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_arg0.get()))
-                return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                        length_value, FixedStringSource(*col_fixed), input_rows_count);
-            if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_arg0.get()))
-                return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                        length_value, ConstSource<StringSource>(*col_const), input_rows_count);
-            if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_arg0.get()))
-                return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                        length_value, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
-            if (isEnum8(arguments[0].type))
-                if (const ColumnVector<Int8> * col_enum8 = checkAndGetColumn<ColumnVector<Int8>>(column_arg0.get()))
+            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
+                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, StringSource(*col), input_rows_count);
+            if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
+                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, FixedStringSource(*col_fixed), input_rows_count);
+            if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
+                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource<StringSource>(*col_const), input_rows_count);
+            if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
+                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
+            if (isEnum(arguments[0].type))
+            {
+                if (const typename DataTypeEnum8::ColumnType * col_enum8 = checkAndGetColumn<typename DataTypeEnum8::ColumnType>(column_string.get()))
                 {
-                    const auto * enum_type = typeid_cast<const DataTypeEnum<Int8> *>(arguments[0].type.get());
-                    return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                            length_value, EnumSource<Int8>(*col_enum8, *enum_type), input_rows_count);
+                    const auto * type_enum8 = assert_cast<const DataTypeEnum8 *>(arguments[0].type.get());
+                    return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, EnumSource<DataTypeEnum8>(*col_enum8, *type_enum8), input_rows_count);
                 }
-            if (isEnum16(arguments[0].type))
-                if (const ColumnVector<Int16> * col_enum16 = checkAndGetColumn<ColumnVector<Int16>>(column_arg0.get()))
+                if (const typename DataTypeEnum16::ColumnType * col_enum16 = checkAndGetColumn<typename DataTypeEnum16::ColumnType>(column_string.get()))
                 {
-                    const auto * enum_type = typeid_cast<const DataTypeEnum<Int16> *>(arguments[0].type.get());
-                    return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
-                                            length_value, EnumSource<Int16>(*col_enum16, *enum_type), input_rows_count);
+                    const auto * type_enum16 = assert_cast<const DataTypeEnum16 *>(arguments[0].type.get());
+                    return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, EnumSource<DataTypeEnum16>(*col_enum16, *type_enum16), input_rows_count);
                 }
+            }
 
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
-                arguments[0].column->getName(), getName());
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
         }
     }
 };
@@ -197,8 +182,8 @@ public:
 REGISTER_FUNCTION(Substring)
 {
     factory.registerFunction<FunctionSubstring<false>>({}, FunctionFactory::CaseInsensitive);
-    factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive);
-    factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// from MySQL dialect
+    factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive); // MySQL alias
+    factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// MySQL alias
 
     factory.registerFunction<FunctionSubstring<true>>({}, FunctionFactory::CaseSensitive);
 }
diff --git a/tests/queries/0_stateless/00493_substring_of_enum.sql b/tests/queries/0_stateless/00493_substring_of_enum.sql
index ba9fc630490..cdda76e88a7 100644
--- a/tests/queries/0_stateless/00493_substring_of_enum.sql
+++ b/tests/queries/0_stateless/00493_substring_of_enum.sql
@@ -1,6 +1,6 @@
-DROP TABLE IF EXISTS substring_enums_test;
-CREATE TABLE substring_enums_test(e8 Enum('hello' = -5, 'world' = 15), e16 Enum('shark' = -999, 'eagle' = 9999)) ENGINE MergeTree ORDER BY tuple();
-INSERT INTO TABLE substring_enums_test VALUES ('hello', 'shark'), ('world', 'eagle');
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab(e8 Enum8('hello' = -5, 'world' = 15), e16 Enum16('shark' = -999, 'eagle' = 9999)) ENGINE MergeTree ORDER BY tuple();
+INSERT INTO TABLE tab VALUES ('hello', 'shark'), ('world', 'eagle');
 
 SELECT '-- Positive offsets (slice from left)';
 WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6),
@@ -8,7 +8,7 @@ WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6),
 SELECT 'Offset: ', p.offset, 'Length: ', p.length,
        substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2,
        substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
-FROM substring_enums_test LEFT JOIN permutations AS p ON true;
+FROM tab LEFT JOIN permutations AS p ON true;
 
 SELECT '-- Negative offsets (slice from right)';
 WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6),
@@ -16,7 +16,7 @@ WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6),
 SELECT 'Offset: ', p.offset, 'Length: ', p.length,
        substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2,
        substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
-FROM substring_enums_test LEFT JOIN permutations AS p ON true;
+FROM tab LEFT JOIN permutations AS p ON true;
 
 SELECT '-- Zero offset/length';
 WITH cte AS (SELECT number AS n FROM system.numbers LIMIT 2),
@@ -24,9 +24,9 @@ WITH cte AS (SELECT number AS n FROM system.numbers LIMIT 2),
 SELECT 'Offset: ', p.offset, 'Length: ', p.length,
        substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2,
        substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4
-FROM substring_enums_test LEFT JOIN permutations AS p ON true;
+FROM tab LEFT JOIN permutations AS p ON true;
 
 SELECT '-- Constant enums';
 SELECT substring(CAST('foo', 'Enum8(\'foo\' = 1)'), 1, 1), substring(CAST('foo', 'Enum16(\'foo\' = 1111)'), 1, 2);
 
-DROP TABLE substring_enums_test;
+DROP TABLE tab;

From b493ce23852dc74e9001832485a2ad00e966e6c2 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 29 Nov 2023 14:19:38 +0000
Subject: [PATCH 040/213] Better JSON -> JSONEachRow fallback without catching
 exceptions

---
 src/Formats/JSONUtils.cpp                     | 116 ++++++++++++++++++
 src/Formats/JSONUtils.h                       |   3 +
 .../Formats/Impl/JSONRowInputFormat.cpp       |  50 +++-----
 3 files changed, 135 insertions(+), 34 deletions(-)

diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 349945bbd54..7ddfdb6b572 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -564,6 +564,15 @@ namespace JSONUtils
         skipWhitespaceIfAny(in);
     }
 
+    bool checkAndSkipColon(ReadBuffer & in)
+    {
+        skipWhitespaceIfAny(in);
+        if (!checkChar(':', in))
+            return false;
+        skipWhitespaceIfAny(in);
+        return true;
+    }
+
     String readFieldName(ReadBuffer & in)
     {
         skipWhitespaceIfAny(in);
@@ -573,6 +582,12 @@ namespace JSONUtils
         return field;
     }
 
+    bool tryReadFieldName(ReadBuffer & in, String & field)
+    {
+        skipWhitespaceIfAny(in);
+        return tryReadJSONStringInto(field, in) && checkAndSkipColon(in);
+    }
+
     String readStringField(ReadBuffer & in)
     {
         skipWhitespaceIfAny(in);
@@ -582,6 +597,15 @@ namespace JSONUtils
         return value;
     }
 
+    bool tryReadStringField(ReadBuffer & in, String & value)
+    {
+        skipWhitespaceIfAny(in);
+        if (!tryReadJSONStringInto(value, in))
+            return false;
+        skipWhitespaceIfAny(in);
+        return true;
+    }
+
     void skipArrayStart(ReadBuffer & in)
     {
         skipWhitespaceIfAny(in);
@@ -628,6 +652,15 @@ namespace JSONUtils
         skipWhitespaceIfAny(in);
     }
 
+    bool checkAndSkipObjectStart(ReadBuffer & in)
+    {
+        skipWhitespaceIfAny(in);
+        if (!checkChar('{', in))
+            return false;
+        skipWhitespaceIfAny(in);
+        return true;
+    }
+
     bool checkAndSkipObjectEnd(ReadBuffer & in)
     {
         skipWhitespaceIfAny(in);
@@ -644,6 +677,15 @@ namespace JSONUtils
         skipWhitespaceIfAny(in);
     }
 
+    bool checkAndSkipComma(ReadBuffer & in)
+    {
+        skipWhitespaceIfAny(in);
+        if (!checkChar(',', in))
+            return false;
+        skipWhitespaceIfAny(in);
+        return true;
+    }
+
     std::pair<String, String> readStringFieldNameAndValue(ReadBuffer & in)
     {
         auto field_name = readFieldName(in);
@@ -651,6 +693,11 @@ namespace JSONUtils
         return {field_name, field_value};
     }
 
+    bool tryReadStringFieldNameAndValue(ReadBuffer & in, std::pair<String, String> & field_and_value)
+    {
+        return tryReadFieldName(in, field_and_value.first) && tryReadStringField(in, field_and_value.second);
+    }
+
     NameAndTypePair readObjectWithNameAndType(ReadBuffer & in)
     {
         skipObjectStart(in);
@@ -673,6 +720,44 @@ namespace JSONUtils
         return name_and_type;
     }
 
+    bool tryReadObjectWithNameAndType(ReadBuffer & in, NameAndTypePair & name_and_type)
+    {
+        if (!checkAndSkipObjectStart(in))
+            return false;
+
+        std::pair<String, String> first_field_and_value;
+        if (!tryReadStringFieldNameAndValue(in, first_field_and_value))
+            return false;
+
+        if (!checkAndSkipComma(in))
+            return false;
+
+        std::pair<String, String> second_field_and_value;
+        if (!tryReadStringFieldNameAndValue(in, second_field_and_value))
+            return false;
+
+        if (first_field_and_value.first == "name" && second_field_and_value.first == "type")
+        {
+            auto type = DataTypeFactory::instance().tryGet(second_field_and_value.second);
+            if (!type)
+                return false;
+            name_and_type = {first_field_and_value.second, type};
+        }
+        else if (second_field_and_value.first == "name" && first_field_and_value.first == "type")
+        {
+            auto type = DataTypeFactory::instance().tryGet(first_field_and_value.second);
+            if (!type)
+                return false;
+            name_and_type = {second_field_and_value.second, type};
+        }
+        else
+        {
+            return false;
+        }
+
+        return checkAndSkipObjectEnd(in);
+    }
+
     NamesAndTypesList readMetadata(ReadBuffer & in)
     {
         auto field_name = readFieldName(in);
@@ -693,6 +778,37 @@ namespace JSONUtils
         return names_and_types;
     }
 
+    bool tryReadMetadata(ReadBuffer & in, NamesAndTypesList & names_and_types)
+    {
+        String field_name;
+        if (!tryReadFieldName(in, field_name) || field_name != "meta")
+            return false;
+
+        if (!checkAndSkipArrayStart(in))
+            return false;
+
+        bool first = true;
+        while (!checkAndSkipArrayEnd(in))
+        {
+            if (!first)
+            {
+                if (!checkAndSkipComma(in))
+                    return false;
+            }
+            else
+            {
+                first = false;
+            }
+
+            NameAndTypePair name_and_type;
+            if (!tryReadObjectWithNameAndType(in, name_and_type))
+                return false;
+            names_and_types.push_back(name_and_type);
+        }
+
+        return !names_and_types.empty();
+    }
+
     void validateMetadataByHeader(const NamesAndTypesList & names_and_types_from_metadata, const Block & header)
     {
         for (const auto & [name, type] : names_and_types_from_metadata)
diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h
index cd6b5ff8171..a770ded9687 100644
--- a/src/Formats/JSONUtils.h
+++ b/src/Formats/JSONUtils.h
@@ -112,6 +112,7 @@ namespace JSONUtils
 
     void skipColon(ReadBuffer & in);
     void skipComma(ReadBuffer & in);
+    bool checkAndSkipComma(ReadBuffer & in);
 
     String readFieldName(ReadBuffer & in);
 
@@ -122,9 +123,11 @@ namespace JSONUtils
 
     void skipObjectStart(ReadBuffer & in);
     void skipObjectEnd(ReadBuffer & in);
+    bool checkAndSkipObjectStart(ReadBuffer & in);
     bool checkAndSkipObjectEnd(ReadBuffer & in);
 
     NamesAndTypesList readMetadata(ReadBuffer & in);
+    bool tryReadMetadata(ReadBuffer & in, NamesAndTypesList & names_and_types);
     NamesAndTypesList readMetadataAndValidateHeader(ReadBuffer & in, const Block & header);
     void validateMetadataByHeader(const NamesAndTypesList & names_and_types_from_metadata, const Block & header);
 
diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
index f083a00f766..fc4c868b604 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@@ -30,38 +30,24 @@ void JSONRowInputFormat::readPrefix()
     NamesAndTypesList names_and_types_from_metadata;
 
     /// Try to parse metadata, if failed, try to parse data as JSONEachRow format.
-    try
+    if (JSONUtils::checkAndSkipObjectStart(*peekable_buf)
+        && JSONUtils::tryReadMetadata(*peekable_buf, names_and_types_from_metadata)
+        && JSONUtils::checkAndSkipComma(*peekable_buf)
+        && JSONUtils::skipUntilFieldInObject(*peekable_buf, "data")
+        && JSONUtils::checkAndSkipArrayStart(*peekable_buf))
     {
-        JSONUtils::skipObjectStart(*peekable_buf);
-        names_and_types_from_metadata = JSONUtils::readMetadata(*peekable_buf);
-        JSONUtils::skipComma(*peekable_buf);
-        if (!JSONUtils::skipUntilFieldInObject(*peekable_buf, "data"))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Expected field \"data\" with table content");
-
-        JSONUtils::skipArrayStart(*peekable_buf);
         data_in_square_brackets = true;
+        if (validate_types_from_metadata)
+        {
+            JSONUtils::validateMetadataByHeader(names_and_types_from_metadata, getPort().getHeader());
+        }
     }
-    catch (const ParsingException &)
+    else
     {
         parse_as_json_each_row = true;
-    }
-    catch (const Exception & e)
-    {
-        if (e.code() != ErrorCodes::INCORRECT_DATA)
-            throw;
-
-        parse_as_json_each_row = true;
-    }
-
-    if (parse_as_json_each_row)
-    {
         peekable_buf->rollbackToCheckpoint();
         JSONEachRowRowInputFormat::readPrefix();
     }
-    else if (validate_types_from_metadata)
-    {
-        JSONUtils::validateMetadataByHeader(names_and_types_from_metadata, getPort().getHeader());
-    }
 }
 
 void JSONRowInputFormat::readSuffix()
@@ -103,16 +89,12 @@ NamesAndTypesList JSONRowSchemaReader::readSchema()
     skipBOMIfExists(*peekable_buf);
     PeekableReadBufferCheckpoint checkpoint(*peekable_buf);
     /// Try to parse metadata, if failed, try to parse data as JSONEachRow format
-    try
-    {
-        JSONUtils::skipObjectStart(*peekable_buf);
-        return JSONUtils::readMetadata(*peekable_buf);
-    }
-    catch (...)
-    {
-        peekable_buf->rollbackToCheckpoint(true);
-        return JSONEachRowSchemaReader::readSchema();
-    }
+    NamesAndTypesList names_and_types;
+    if (JSONUtils::checkAndSkipObjectStart(*peekable_buf) && JSONUtils::tryReadMetadata(*peekable_buf, names_and_types))
+        return names_and_types;
+
+    peekable_buf->rollbackToCheckpoint(true);
+    return JSONEachRowSchemaReader::readSchema();
 }
 
 void registerInputFormatJSON(FormatFactory & factory)

From 1d840ca02693795890d8bd2ab447a7f7536ce888 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 29 Nov 2023 20:49:53 +0100
Subject: [PATCH 041/213] Fix style

---
 src/Processors/Formats/Impl/JSONRowInputFormat.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
index fc4c868b604..fcc7f0f8381 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@@ -7,11 +7,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int INCORRECT_DATA;
-}
-
 JSONRowInputFormat::JSONRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
     : JSONRowInputFormat(std::make_unique<PeekableReadBuffer>(in_), header_, params_, format_settings_)
 {

From d9f4b4d2c05d553d287f4f3551c406fe599b50d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Thu, 30 Nov 2023 10:56:31 +0800
Subject: [PATCH 042/213] support parts, tables, databases

---
 programs/server/Server.cpp                    |  3 +
 src/Core/ServerSettings.h                     |  3 +
 src/Databases/DatabaseLazy.cpp                |  7 ++-
 src/Databases/DatabaseOnDisk.cpp              |  1 -
 src/Databases/DatabasesCommon.cpp             |  2 -
 src/Databases/IDatabase.cpp                   | 15 +++++
 src/Databases/IDatabase.h                     |  4 +-
 src/Interpreters/Context.cpp                  | 34 +++++++++++
 src/Interpreters/Context.h                    |  3 +
 tests/config/config.d/max_num_to_warn.xml     |  5 ++
 tests/config/install.sh                       |  1 +
 .../02931_max_num_to_warn.reference           |  3 +
 .../0_stateless/02931_max_num_to_warn.sql     | 61 +++++++++++++++++++
 13 files changed, 136 insertions(+), 6 deletions(-)
 create mode 100644 tests/config/config.d/max_num_to_warn.xml
 create mode 100644 tests/queries/0_stateless/02931_max_num_to_warn.reference
 create mode 100644 tests/queries/0_stateless/02931_max_num_to_warn.sql

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 11ad06640c8..c3d7bff00d6 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1280,6 +1280,9 @@ try
 
             global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
             global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
+            global_context->setMaxTableNumToWarn(server_settings_.max_table_num_to_warn);
+            global_context->setMaxDatabaseNumToWarn(server_settings_.max_database_num_to_warn);
+            global_context->setMaxPartNumToWarn(server_settings_.max_part_num_to_warn);
 
             ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
             if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index eca4b7424de..0a48c73c1fd 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -79,6 +79,9 @@ namespace DB
     \
     M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
     M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
+    M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
     M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
     \
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 9070c0b6ee7..75e4b19d628 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -18,6 +18,10 @@
 
 namespace fs = std::filesystem;
 
+namespace CurrentMetrics {
+    extern const Metric AttachedTable;
+}
+
 namespace DB
 {
 
@@ -64,7 +68,6 @@ void DatabaseLazy::createTable(
     SCOPE_EXIT_MEMORY_SAFE({ clearExpiredTables(); });
     if (!endsWith(table->getName(), "Log"))
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lazy engine can be used only with *Log tables.");
-    std::cout<<"======= Flag 1"<<std::endl;
     DatabaseOnDisk::createTable(local_context, table_name, table, query);
 
     /// DatabaseOnDisk::createTable renames file, so we need to get new metadata_modification_time.
@@ -175,6 +178,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
         throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name));
 
     it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
+    CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
 }
 
 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@@ -190,6 +194,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
         if (it->second.expiration_iterator != cache_expiration_queue.end())
             cache_expiration_queue.erase(it->second.expiration_iterator);
         tables_cache.erase(it);
+        CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
     }
     return res;
 }
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index f1fc6bbdd7a..549711150b8 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -263,7 +263,6 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
     try
     {
         /// Add a table to the map of known tables.
-        std::cout<<"===== flag 3"<<std::endl;
         attachTable(query_context, query.getTable(), table, getTableDataPath(query));
 
         /// If it was ATTACH query and file with table metadata already exist
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index 2bdca2398b5..e451f3a388e 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -263,14 +263,12 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
 
 void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
 {
-    std::cout<<"========= Flag 5"<<std::endl;
     std::lock_guard lock(mutex);
     attachTableUnlocked(table_name, table);
 }
 
 void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, const StoragePtr & table)
 {
-    std::cout<<"========= Flag 6"<<std::endl;
     auto table_id = table->getStorageID();
     if (table_id.database_name != database_name)
         throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed to `{}`, cannot create table in `{}`",
diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp
index 09640d2f86e..b4fd5ea4612 100644
--- a/src/Databases/IDatabase.cpp
+++ b/src/Databases/IDatabase.cpp
@@ -5,8 +5,14 @@
 #include <Common/quoteString.h>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Common/NamePrompter.h>
+#include <Common/CurrentMetrics.h>
 
 
+namespace CurrentMetrics
+{
+    extern const Metric AttachedDatabase;
+}
+
 namespace DB
 {
 
@@ -29,6 +35,15 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const
         throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist. Maybe you meant {}?", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name), backQuoteIfNeed(names[0]));
 }
 
+IDatabase::IDatabase(String database_name_) : database_name(std::move(database_name_)) {
+    CurrentMetrics::add(CurrentMetrics::AttachedDatabase, 1);
+}
+
+IDatabase::~IDatabase()
+{
+    CurrentMetrics::sub(CurrentMetrics::AttachedDatabase, 1);
+}
+
 std::vector<std::pair<ASTPtr, StoragePtr>> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
 {
     /// Cannot backup any table because IDatabase doesn't own any tables.
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index e886f1adae3..7a8f9cb2e2b 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -122,7 +122,7 @@ class IDatabase : public std::enable_shared_from_this<IDatabase>
 {
 public:
     IDatabase() = delete;
-    explicit IDatabase(String database_name_) : database_name(std::move(database_name_)) {}
+    explicit IDatabase(String database_name_);
 
     /// Get name of database engine.
     virtual String getEngineName() const = 0;
@@ -357,7 +357,7 @@ public:
     /// Creates a table restored from backup.
     virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms);
 
-    virtual ~IDatabase() = default;
+    virtual ~IDatabase();
 
 protected:
     virtual ASTPtr getCreateTableQueryImpl(const String & /*name*/, ContextPtr /*context*/, bool throw_on_error) const
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 185f9782da5..a2c94d59826 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -142,6 +142,9 @@ namespace CurrentMetrics
     extern const Metric IOWriterThreads;
     extern const Metric IOWriterThreadsActive;
     extern const Metric IOWriterThreadsScheduled;
+    extern const Metric AttachedTable;
+    extern const Metric AttachedDatabase;
+    extern const Metric PartsActive;
 }
 
 
@@ -323,6 +326,10 @@ struct ContextSharedPart : boost::noncopyable
     std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of ReplicatedMergeTree* engines.
     std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
     std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
+                                                                   ///     std::atomic_size_t max_table_size_to_warn;
+    std::atomic_size_t max_database_num_to_warn = 1000lu;
+    std::atomic_size_t max_table_num_to_warn = 5000lu;
+    std::atomic_size_t max_part_num_to_warn = 100000lu;
     /// No lock required for format_schema_path modified only during initialization
     String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
     mutable OnceFlag action_locks_manager_initialized;
@@ -829,6 +836,15 @@ Strings Context::getWarnings() const
     {
         SharedLockGuard lock(shared->mutex);
         common_warnings = shared->warnings;
+
+        if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<DB::Int64>(shared->max_table_num_to_warn))
+            common_warnings.emplace_back(fmt::format("Attached tables is more than {}", shared->max_table_num_to_warn));
+
+        if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<DB::Int64>(shared->max_database_num_to_warn))
+            common_warnings.emplace_back(fmt::format("Attached databases is more than {}", shared->max_table_num_to_warn));
+
+        if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<DB::Int64>(shared->max_part_num_to_warn))
+            common_warnings.emplace_back(fmt::format("Active parts is more than {}", shared->max_part_num_to_warn));
     }
     /// Make setting's name ordered
     std::set<String> obsolete_settings;
@@ -3322,6 +3338,24 @@ UInt16 Context::getServerPort(const String & port_name) const
         return it->second;
 }
 
+void Context::setMaxPartNumToWarn(size_t max_part_to_warn)
+{
+   SharedLockGuard lock(shared->mutex);
+   shared->max_part_num_to_warn = max_part_to_warn;
+}
+
+void Context::setMaxTableNumToWarn(size_t max_table_to_warn)
+{
+   SharedLockGuard lock(shared->mutex);
+   shared->max_table_num_to_warn= max_table_to_warn;
+}
+
+void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn)
+{
+   SharedLockGuard lock(shared->mutex);
+   shared->max_database_num_to_warn= max_database_to_warn;
+}
+
 std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) const
 {
     if (auto res = tryGetCluster(cluster_name))
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 7ae567548dd..517ccded6bd 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -838,6 +838,9 @@ public:
     void setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config);
     const HTTPHeaderFilter & getHTTPHeaderFilter() const;
 
+    void setMaxTableNumToWarn(size_t max_table_to_warn);
+    void setMaxDatabaseNumToWarn(size_t max_database_to_warn);
+    void setMaxPartNumToWarn(size_t max_part_to_warn);
     /// The port that the server listens for executing SQL queries.
     UInt16 getTCPPort() const;
 
diff --git a/tests/config/config.d/max_num_to_warn.xml b/tests/config/config.d/max_num_to_warn.xml
new file mode 100644
index 00000000000..77d68998f8e
--- /dev/null
+++ b/tests/config/config.d/max_num_to_warn.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <max_table_num_to_warn>10</max_table_num_to_warn>
+    <max_database_num_to_warn>10</max_database_num_to_warn>
+    <max_part_num_to_warn>10</max_part_num_to_warn>
+</clickhouse>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 417a413bbec..9538b59ad9d 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -16,6 +16,7 @@ mkdir -p $DEST_SERVER_PATH/users.d/
 mkdir -p $DEST_CLIENT_PATH
 
 ln -sf $SRC_PATH/config.d/zookeeper_write.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/max_num_to_warn.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/blob_storage_log.xml $DEST_SERVER_PATH/config.d/
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference
new file mode 100644
index 00000000000..76d86352bfc
--- /dev/null
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference
@@ -0,0 +1,3 @@
+Attached tables is more than 10
+Attached databases is more than 10
+Active parts is more than 10
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
new file mode 100644
index 00000000000..cd9a4ebe5fa
--- /dev/null
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -0,0 +1,61 @@
+CREATE TABLE test_max_num_to_warn_1 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_2 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_3 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_4 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_5 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_6 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_7 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_8 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_9 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_10 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_11 (id Int32, str String) Engine=Memory;
+
+CREATE DATABASE test_max_num_to_warn_1;
+CREATE DATABASE test_max_num_to_warn_2;
+CREATE DATABASE test_max_num_to_warn_3;
+CREATE DATABASE test_max_num_to_warn_4;
+CREATE DATABASE test_max_num_to_warn_5;
+CREATE DATABASE test_max_num_to_warn_6;
+CREATE DATABASE test_max_num_to_warn_7;
+CREATE DATABASE test_max_num_to_warn_8;
+CREATE DATABASE test_max_num_to_warn_9;
+CREATE DATABASE test_max_num_to_warn_10;
+CREATE DATABASE test_max_num_to_warn_11;
+
+INSERT INTO test_max_num_to_warn_1 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_2 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_3 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_4 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_5 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_6 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_7 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_8 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_9 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_10 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_11 VALUES (1, 'Hello');
+
+SELECT * FROM system.warnings where message in ('Attached tables is more than 10', 'Attached databases is more than 10', 'Active parts is more than 10');
+
+DROP TABLE test_max_num_to_warn_1;
+DROP TABLE test_max_num_to_warn_2;
+DROP TABLE test_max_num_to_warn_3;
+DROP TABLE test_max_num_to_warn_4;
+DROP TABLE test_max_num_to_warn_5;
+DROP TABLE test_max_num_to_warn_6;
+DROP TABLE test_max_num_to_warn_7;
+DROP TABLE test_max_num_to_warn_8;
+DROP TABLE test_max_num_to_warn_9;
+DROP TABLE test_max_num_to_warn_10;
+DROP TABLE test_max_num_to_warn_11;
+
+DROP DATABASE test_max_num_to_warn_1;
+DROP DATABASE test_max_num_to_warn_2;
+DROP DATABASE test_max_num_to_warn_3;
+DROP DATABASE test_max_num_to_warn_4;
+DROP DATABASE test_max_num_to_warn_5;
+DROP DATABASE test_max_num_to_warn_6;
+DROP DATABASE test_max_num_to_warn_7;
+DROP DATABASE test_max_num_to_warn_8;
+DROP DATABASE test_max_num_to_warn_9;
+DROP DATABASE test_max_num_to_warn_10;
+DROP DATABASE test_max_num_to_warn_11;

From 64bd77fe9c0131ba9d9699c421e231e92242023a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 30 Nov 2023 14:05:19 +0100
Subject: [PATCH 043/213] Do not randomize optimize_functions_to_subcolumns

---
 tests/clickhouse-test                                           | 1 -
 tests/queries/0_stateless/02498_analyzer_settings_push_down.sql | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index dc6614471a4..fcd8c7ba0b4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -556,7 +556,6 @@ class SettingsRandomizer:
         "max_block_size": lambda: random.randint(8000, 100000),
         "max_threads": lambda: random.randint(1, 64),
         "optimize_append_index": lambda: random.randint(1, 1),
-        "optimize_functions_to_subcolumns": lambda: random.randint(1, 1),
         "optimize_syntax_fuse_functions": lambda: random.randint(1, 1),
         "optimize_if_chain_to_multiif": lambda: random.randint(1, 1),
         "optimize_if_transform_strings_to_enum": lambda: random.randint(1, 1),
diff --git a/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql b/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql
index 91bdce2cca9..67623869f0a 100644
--- a/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql
+++ b/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql
@@ -1,4 +1,5 @@
 SET allow_experimental_analyzer = 1;
+SET optimize_functions_to_subcolumns = 0;
 
 DROP TABLE IF EXISTS test_table;
 CREATE TABLE test_table (id UInt64, value Tuple(a UInt64)) ENGINE=MergeTree ORDER BY id;

From 3867adbb10a83631a06e0b40c74b1bd4fee7ba3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 30 Nov 2023 19:19:26 +0100
Subject: [PATCH 044/213] Adapt tests and avoid using
 optimize_syntax_fuse_functions at all

---
 tests/clickhouse-test                                            | 1 -
 tests/queries/0_stateless/01300_group_by_other_keys_having.sql   | 1 +
 .../queries/0_stateless/01622_constraints_where_optimization.sql | 1 +
 .../0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql   | 1 +
 4 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index fcd8c7ba0b4..fe93864b202 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -556,7 +556,6 @@ class SettingsRandomizer:
         "max_block_size": lambda: random.randint(8000, 100000),
         "max_threads": lambda: random.randint(1, 64),
         "optimize_append_index": lambda: random.randint(1, 1),
-        "optimize_syntax_fuse_functions": lambda: random.randint(1, 1),
         "optimize_if_chain_to_multiif": lambda: random.randint(1, 1),
         "optimize_if_transform_strings_to_enum": lambda: random.randint(1, 1),
         "optimize_read_in_order": lambda: random.randint(0, 1),
diff --git a/tests/queries/0_stateless/01300_group_by_other_keys_having.sql b/tests/queries/0_stateless/01300_group_by_other_keys_having.sql
index 911f61a62e2..203e8322ad9 100644
--- a/tests/queries/0_stateless/01300_group_by_other_keys_having.sql
+++ b/tests/queries/0_stateless/01300_group_by_other_keys_having.sql
@@ -1,4 +1,5 @@
 set optimize_group_by_function_keys = 1;
+set optimize_syntax_fuse_functions = 0;
 set allow_experimental_analyzer = 1;
 
 -- { echoOn }
diff --git a/tests/queries/0_stateless/01622_constraints_where_optimization.sql b/tests/queries/0_stateless/01622_constraints_where_optimization.sql
index 2818351a120..d41b1988bdd 100644
--- a/tests/queries/0_stateless/01622_constraints_where_optimization.sql
+++ b/tests/queries/0_stateless/01622_constraints_where_optimization.sql
@@ -1,5 +1,6 @@
 SET convert_query_to_cnf = 1;
 SET optimize_using_constraints = 1;
+SET optimize_append_index = 0;
 
 DROP TABLE IF EXISTS t_constraints_where;
 
diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql
index de9208ef009..fef71fdf94f 100644
--- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql
+++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql
@@ -1,4 +1,5 @@
 set allow_experimental_analyzer = 1;
+set optimize_syntax_fuse_functions = 0;
 
 EXPLAIN QUERY TREE run_passes=1
 SELECT avg(log(2) * number) AS k FROM numbers(10000000)

From 08f943462fa243ef0a18fa27a60be543e25cda74 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 30 Nov 2023 18:23:05 +0000
Subject: [PATCH 045/213] Delay totals port for creating sets as well.

---
 src/Processors/QueryPlan/CreatingSetsStep.cpp |   2 +
 src/QueryPipeline/Pipe.cpp                    | 211 ++++++++----------
 src/QueryPipeline/Pipe.h                      |   5 +
 src/QueryPipeline/QueryPipelineBuilder.cpp    |  14 +-
 4 files changed, 110 insertions(+), 122 deletions(-)

diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index 3e4dfb0c7d1..37f81ffd160 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -111,6 +111,8 @@ QueryPipelineBuilderPtr CreatingSetsStep::updatePipeline(QueryPipelineBuilders p
     else
         delayed_pipeline = std::move(*pipelines.front());
 
+    delayed_pipeline.dropTotalsAndExtremes();
+
     QueryPipelineProcessorsCollector collector(*main_pipeline, this);
     main_pipeline->addPipelineBefore(std::move(delayed_pipeline));
     auto added_processors = collector.detachProcessors();
diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp
index b1c82d7a7e8..fd433638252 100644
--- a/src/QueryPipeline/Pipe.cpp
+++ b/src/QueryPipeline/Pipe.cpp
@@ -434,68 +434,130 @@ void Pipe::addTransform(ProcessorPtr transform)
 }
 
 void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes)
+{
+    addTransform(std::move(transform),
+        static_cast<InputPort *>(nullptr), static_cast<InputPort *>(nullptr),
+        totals, extremes);
+}
+
+void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes)
+{
+    addTransform(std::move(transform),
+        totals, extremes,
+        static_cast<OutputPort *>(nullptr), static_cast<OutputPort *>(nullptr));
+}
+
+void Pipe::addTransform(
+    ProcessorPtr transform,
+    InputPort * totals_in, InputPort * extremes_in,
+    OutputPort * totals_out, OutputPort * extremes_out)
 {
     if (output_ports.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform to empty Pipe");
 
+    if (totals_in && !totals_port)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming totals to Pipe because Pipe does not have totals");
+
+    if (extremes_in && !extremes_port)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming extremes to Pipe because Pipe does not have extremes");
+
+    if (totals_out && !totals_in && totals_port)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with totals to Pipe because it already has totals");
+
+    if (extremes_out && !extremes_in && extremes_port)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with extremes to Pipe because it already has extremes");
+
     auto & inputs = transform->getInputs();
-    if (inputs.size() != output_ports.size())
+    auto & outputs = transform->getOutputs();
+
+    size_t expected_inputs = output_ports.size() + (totals_in ? 1 : 0) + (extremes_in ? 1 : 0);
+    if (inputs.size() != expected_inputs)
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "Cannot add transform {} to Pipe because it has {} input ports, but {} expected",
             transform->getName(),
             inputs.size(),
-            output_ports.size());
+            expected_inputs);
 
-    if (totals && totals_port)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with totals to Pipe because it already has totals");
+    if (outputs.size() <= (totals_out ? 1 : 0) + (extremes_out ? 1 : 0))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform {} to Pipes because it has no outputs",
+                        transform->getName());
 
-    if (extremes && extremes_port)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with extremes to Pipe because it already has extremes");
+    bool found_totals_in = false;
+    bool found_extremes_in = false;
 
-    if (totals)
-        totals_port = totals;
-    if (extremes)
-        extremes_port = extremes;
-
-    size_t next_output = 0;
     for (auto & input : inputs)
     {
-        connect(*output_ports[next_output], input);
-        ++next_output;
+        if (&input == totals_in)
+            found_totals_in = true;
+        else if (&input == extremes_in)
+            found_extremes_in = true;
     }
 
-    auto & outputs = transform->getOutputs();
+    if (totals_in && !found_totals_in)
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Cannot add transform {} to Pipes because specified totals port does not belong to it",
+            transform->getName());
 
-    output_ports.clear();
-    output_ports.reserve(outputs.size());
+    if (extremes_in && !found_extremes_in)
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Cannot add transform {} to Pipes because specified extremes port does not belong to it",
+            transform->getName());
 
-    bool found_totals = false;
-    bool found_extremes = false;
+    bool found_totals_out = false;
+    bool found_extremes_out = false;
 
     for (auto & output : outputs)
     {
-        if (&output == totals)
-            found_totals = true;
-        else if (&output == extremes)
-            found_extremes = true;
-        else
-            output_ports.emplace_back(&output);
+        if (&output == totals_out)
+            found_totals_out = true;
+        else if (&output == extremes_out)
+            found_extremes_out = true;
     }
 
-    if (totals && !found_totals)
+    if (totals_out && !found_totals_out)
         throw Exception(ErrorCodes::LOGICAL_ERROR,
                         "Cannot add transform {} to Pipes because specified totals port does not belong to it",
                         transform->getName());
 
-    if (extremes && !found_extremes)
+    if (extremes_out && !found_extremes_out)
         throw Exception(ErrorCodes::LOGICAL_ERROR,
                         "Cannot add transform {} to Pipes because specified extremes port does not belong to it",
                         transform->getName());
 
-    if (output_ports.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform {} to Pipes because it has no outputs",
-                        transform->getName());
+    if (totals_in)
+    {
+        connect(*totals_port, *totals_in);
+        totals_port = nullptr;
+    }
+    if (extremes_in)
+    {
+        connect(*extremes_port, *extremes_in);
+        extremes_port = nullptr;
+    }
+
+    totals_port = totals_out ? totals_out : totals_port;
+    extremes_port = extremes_out ? extremes_out : extremes_port;
+
+    size_t next_output = 0;
+    for (auto & input : inputs)
+    {
+        if (&input != totals_in && &input != extremes_in)
+        {
+            connect(*output_ports[next_output], input);
+            ++next_output;
+        }
+    }
+
+    output_ports.clear();
+    output_ports.reserve(outputs.size());
+    for (auto & output : outputs)
+    {
+        if (&output != totals_out && &output != extremes_out)
+            output_ports.emplace_back(&output);
+    }
 
     header = output_ports.front()->getHeader();
     for (size_t i = 1; i < output_ports.size(); ++i)
@@ -508,100 +570,11 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort
     if (extremes_port)
         assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes");
 
-    if (collected_processors)
-        collected_processors->emplace_back(transform);
-
     processors->emplace_back(std::move(transform));
 
-    max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
-}
-
-void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes)
-{
-    if (output_ports.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform to empty Pipe");
-
-    auto & inputs = transform->getInputs();
-    size_t expected_inputs = output_ports.size() + (totals ? 1 : 0) + (extremes ? 1 : 0);
-    if (inputs.size() != expected_inputs)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cannot add transform {} to Pipe because it has {} input ports, but {} expected",
-            transform->getName(),
-            inputs.size(),
-            expected_inputs);
-
-    if (totals && !totals_port)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming totals to Pipe because Pipe does not have totals");
-
-    if (extremes && !extremes_port)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming extremes to Pipe because Pipe does not have extremes");
-
-    if (totals)
-    {
-        connect(*totals_port, *totals);
-        totals_port = nullptr;
-    }
-    if (extremes)
-    {
-        connect(*extremes_port, *extremes);
-        extremes_port = nullptr;
-    }
-
-    bool found_totals = false;
-    bool found_extremes = false;
-
-    size_t next_output = 0;
-    for (auto & input : inputs)
-    {
-        if (&input == totals)
-            found_totals = true;
-        else if (&input == extremes)
-            found_extremes = true;
-        else
-        {
-            connect(*output_ports[next_output], input);
-            ++next_output;
-        }
-    }
-
-    if (totals && !found_totals)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cannot add transform {} to Pipes because specified totals port does not belong to it",
-            transform->getName());
-
-    if (extremes && !found_extremes)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cannot add transform {} to Pipes because specified extremes port does not belong to it",
-            transform->getName());
-
-    auto & outputs = transform->getOutputs();
-    if (outputs.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform {} to Pipes because it has no outputs", transform->getName());
-
-    output_ports.clear();
-    output_ports.reserve(outputs.size());
-
-    for (auto & output : outputs)
-        output_ports.emplace_back(&output);
-
-    header = output_ports.front()->getHeader();
-    for (size_t i = 1; i < output_ports.size(); ++i)
-        assertBlocksHaveEqualStructure(header, output_ports[i]->getHeader(), "Pipes");
-
-    if (totals_port)
-        assertBlocksHaveEqualStructure(header, totals_port->getHeader(), "Pipes");
-
-    if (extremes_port)
-        assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes");
-
     if (collected_processors)
         collected_processors->emplace_back(transform);
 
-    processors->emplace_back(std::move(transform));
-
     max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
 }
 
diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h
index 09931e38578..ec102605677 100644
--- a/src/QueryPipeline/Pipe.h
+++ b/src/QueryPipeline/Pipe.h
@@ -69,6 +69,11 @@ public:
     void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes);
     void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes);
 
+    void addTransform(
+        ProcessorPtr transform,
+        InputPort * totals_in, InputPort * extremes_in,
+        OutputPort * totals_out, OutputPort * extremes_out);
+
     enum class StreamType
     {
         Main = 0, /// Stream for query data. There may be several streams of this type.
diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index f13d1c56d7f..401987d46ba 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -602,7 +602,9 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for CreatingSets should have empty header. Got: {}",
                         pipeline.getHeader().dumpStructure());
 
-    IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts());
+    bool has_totals = pipe.getTotalsPort();
+    bool has_extremes = pipe.getExtremesPort();
+    IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts() + (has_totals ? 1 : 0) + (has_extremes ? 1 : 0));
     for (size_t i = 0; i < delayed_streams.size(); ++i)
         delayed_streams[i] = i;
 
@@ -613,8 +615,14 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
     pipes.emplace_back(QueryPipelineBuilder::getPipe(std::move(pipeline), resources));
     pipe = Pipe::unitePipes(std::move(pipes), collected_processors, true);
 
-    auto processor = std::make_shared<DelayedPortsProcessor>(getHeader(), pipe.numOutputPorts(), delayed_streams, true);
-    addTransform(std::move(processor));
+    auto processor = std::make_shared<DelayedPortsProcessor>(getHeader(), delayed_streams.size(), delayed_streams, true);
+    auto in = processor->getInputs().begin();
+    auto out = processor->getOutputs().begin();
+    InputPort * totals_in = has_totals ? &*(in++) : nullptr;
+    InputPort * extremes_in = has_extremes ? &*(in++) : nullptr;
+    OutputPort * totals_out = has_totals ? &*(out++) : nullptr;
+    OutputPort * extremes_out = has_extremes ? &*(out++) : nullptr;
+    pipe.addTransform(std::move(processor), totals_in, extremes_in, totals_out, extremes_out);
 }
 
 void QueryPipelineBuilder::setProcessListElement(QueryStatusPtr elem)

From 1751524dafc04f5bca14b8ea9434fd870ab6cc57 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Fri, 1 Dec 2023 03:34:54 +0100
Subject: [PATCH 046/213] Fix style

---
 src/Functions/GatherUtils/Sources.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h
index 329f71203bf..222f9f19168 100644
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@@ -323,7 +323,8 @@ struct StringSource
 
 /// Treats Enum values as Strings, modeled after StringSource
 template <typename EnumDataType>
-struct EnumSource {
+struct EnumSource
+{
     using Column = typename EnumDataType::ColumnType;
     using Slice = NumericArraySlice<UInt8>;
 
@@ -371,7 +372,8 @@ struct EnumSource {
         return data.size();
     }
 
-    Slice getWhole() const {
+    Slice getWhole() const
+    {
         std::string_view name = data_type.getNameForValue(data[row_num]).toView();
         return {reinterpret_cast<const UInt8 *>(name.data()), name.size()};
     }

From 31894bc9dff4df698acaa79aaa841db2840bbdcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 1 Dec 2023 10:38:42 +0800
Subject: [PATCH 047/213] Update settings.md

---
 .../settings.md                               | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 2b73c4ec624..3c2604aed0f 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -472,6 +472,39 @@ The value 0 means that you can delete all tables without any restrictions.
 ``` xml
 <max_table_size_to_drop>0</max_table_size_to_drop>
 ```
+  
+
+## max\_database\_num\_to\_warn {#max-database-num-to-warn}  
+If the number of attached databases exceeds the specified value, clickhouse server will add warning message to `system.warnings` table.    
+Default value: 1000
+
+**Example**
+
+``` xml
+<max_database_num_to_warn>50</max_database_num_to_warn>
+```
+  
+## max\_table\_num\_to\_warn {#max-table-num-to-warn}   
+If the number of attached tables exceeds the specified value, clickhouse server will add warning message to `system.warnings` table.  
+Default value: 5000    
+
+**Example**
+
+``` xml
+<max_table_num_to_warn>400</max_table_num_to_warn>
+```
+
+
+## max\_part\_num\_to\_warn {#max-part-num-to-warn}  
+If the number of active parts exceeds the specified value, clickhouse server will add warning message to `system.warnings` table.  
+Default value: 100000  
+
+**Example**
+
+``` xml
+<max_part_num_to_warn>400</max_part_num_to_warn>
+```
+
 
 ## max_temporary_data_on_disk_size
 

From 2ae934ec05ea797a23ea2d3e8ee4c04db5c39827 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 1 Dec 2023 10:51:32 +0800
Subject: [PATCH 048/213] Update DatabasesCommon.cpp

---
 src/Databases/DatabasesCommon.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index e451f3a388e..6f3a6f547e4 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -16,7 +16,7 @@
 
 namespace CurrentMetrics
 {
-    const extern Metric AttachedTable;
+    extern const Metric AttachedTable;
 }
 
 

From 0b3cfcc8d4adf47bb3365aeb3bf33f9068144818 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 1 Dec 2023 11:23:31 +0800
Subject: [PATCH 049/213] Update DatabaseLazy.cpp

---
 src/Databases/DatabaseLazy.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 75e4b19d628..caf14aa9b15 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -18,10 +18,12 @@
 
 namespace fs = std::filesystem;
 
+
 namespace CurrentMetrics {
     extern const Metric AttachedTable;
 }
 
+
 namespace DB
 {
 

From a30f63802eff4a7203a09baa50e51b2035c07adb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 1 Dec 2023 13:26:30 +0000
Subject: [PATCH 050/213] Fix tests

---
 src/Processors/QueryPlan/CreatingSetsStep.cpp | 2 --
 src/QueryPipeline/Pipe.cpp                    | 4 ++--
 src/QueryPipeline/QueryPipelineBuilder.cpp    | 7 +++++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index 37f81ffd160..3e4dfb0c7d1 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -111,8 +111,6 @@ QueryPipelineBuilderPtr CreatingSetsStep::updatePipeline(QueryPipelineBuilders p
     else
         delayed_pipeline = std::move(*pipelines.front());
 
-    delayed_pipeline.dropTotalsAndExtremes();
-
     QueryPipelineProcessorsCollector collector(*main_pipeline, this);
     main_pipeline->addPipelineBefore(std::move(delayed_pipeline));
     auto added_processors = collector.detachProcessors();
diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp
index fd433638252..8050c7cc671 100644
--- a/src/QueryPipeline/Pipe.cpp
+++ b/src/QueryPipeline/Pipe.cpp
@@ -570,11 +570,11 @@ void Pipe::addTransform(
     if (extremes_port)
         assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes");
 
-    processors->emplace_back(std::move(transform));
-
     if (collected_processors)
         collected_processors->emplace_back(transform);
 
+    processors->emplace_back(std::move(transform));
+
     max_parallel_streams = std::max<size_t>(max_parallel_streams, output_ports.size());
 }
 
diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index 401987d46ba..21eb07a5acd 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -602,9 +602,12 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for CreatingSets should have empty header. Got: {}",
                         pipeline.getHeader().dumpStructure());
 
+    pipeline.dropTotalsAndExtremes();
+
     bool has_totals = pipe.getTotalsPort();
     bool has_extremes = pipe.getExtremesPort();
-    IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts() + (has_totals ? 1 : 0) + (has_extremes ? 1 : 0));
+    size_t num_extra_ports = (has_totals ? 1 : 0) + (has_extremes ? 1 : 0);
+    IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts() + num_extra_ports);
     for (size_t i = 0; i < delayed_streams.size(); ++i)
         delayed_streams[i] = i;
 
@@ -615,7 +618,7 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
     pipes.emplace_back(QueryPipelineBuilder::getPipe(std::move(pipeline), resources));
     pipe = Pipe::unitePipes(std::move(pipes), collected_processors, true);
 
-    auto processor = std::make_shared<DelayedPortsProcessor>(getHeader(), delayed_streams.size(), delayed_streams, true);
+    auto processor = std::make_shared<DelayedPortsProcessor>(getHeader(), pipe.numOutputPorts() + num_extra_ports, delayed_streams, true);
     auto in = processor->getInputs().begin();
     auto out = processor->getOutputs().begin();
     InputPort * totals_in = has_totals ? &*(in++) : nullptr;

From 6d9e7f98a1c572e847f1acfa2ac765f85a2529f4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 1 Dec 2023 13:28:51 +0000
Subject: [PATCH 051/213] Add a test

---
 tests/queries/0_stateless/02932_non_ready_set_stuck.reference | 2 ++
 tests/queries/0_stateless/02932_non_ready_set_stuck.sql       | 2 ++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/02932_non_ready_set_stuck.reference
 create mode 100644 tests/queries/0_stateless/02932_non_ready_set_stuck.sql

diff --git a/tests/queries/0_stateless/02932_non_ready_set_stuck.reference b/tests/queries/0_stateless/02932_non_ready_set_stuck.reference
new file mode 100644
index 00000000000..fc39e7c9b45
--- /dev/null
+++ b/tests/queries/0_stateless/02932_non_ready_set_stuck.reference
@@ -0,0 +1,2 @@
+
+0	0
diff --git a/tests/queries/0_stateless/02932_non_ready_set_stuck.sql b/tests/queries/0_stateless/02932_non_ready_set_stuck.sql
new file mode 100644
index 00000000000..c04f8f18751
--- /dev/null
+++ b/tests/queries/0_stateless/02932_non_ready_set_stuck.sql
@@ -0,0 +1,2 @@
+CREATE TABLE tab (item_id UInt64, price_sold Nullable(Float32), date Date) ENGINE = MergeTree ORDER BY item_id;
+SELECT * FROM (SELECT item_id FROM tab GROUP BY item_id WITH TOTALS ORDER BY '922337203.6854775806' IN (SELECT NULL)) AS l RIGHT JOIN (SELECT item_id FROM tab) AS r ON l.item_id = r.item_id WHERE NULL;

From a90458eb65f3743f262df1241915d3046b163a26 Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Thu, 30 Nov 2023 23:46:42 -0800
Subject: [PATCH 052/213] Fix several issues regarding PostgreSQL `array_ndims`
 usage.

1. Properly quote column identifiers.
2. Handle empty tables that have array columns.
3. Throw a more user friendly error when column value = NULL
   when calling array_dims(column)
4. Handle column value being the empty array {}
---
 .../fetchPostgreSQLTableStructure.cpp         | 48 +++++++++++++++++--
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index dec3f1ffe5a..9890936007f 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -158,6 +158,17 @@ static DataTypePtr convertPostgreSQLDataType(String & type, Fn<void()> auto && r
     return res;
 }
 
+/// Check if PostgreSQL relation is empty.
+/// postgres_table must be already quoted + schema-qualified.
+template<typename T>
+bool isTableEmpty(T &tx, const String & postgres_table) {
+  auto query = fmt::format(
+    "SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);",
+    postgres_table
+  );
+  pqxx::result result{tx.exec(query)};
+  return result[0][0].as<bool>();
+}
 
 template<typename T>
 PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
@@ -213,10 +224,39 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
         {
             const auto & name_and_type = columns[i];
 
-            /// All rows must contain the same number of dimensions, so limit 1 is ok. If number of dimensions in all rows is not the same -
-            /// such arrays are not able to be used as ClickHouse Array at all.
-            pqxx::result result{tx.exec(fmt::format("SELECT array_ndims({}) FROM {} LIMIT 1", name_and_type.name, postgres_table))};
-            auto dimensions = result[0][0].as<int>();
+            /// NOTE: If the relation is empty, then array_ndims returns NULL.
+            /// If this is the case, then assume dimensions=1. This covers most
+            /// use cases, but will be incorrect for empty tables with
+            /// multi-dimension arrays. The other solutions would be to drop
+            /// support for empty tables OR attempt fallback to a discovered
+            /// array_ndims CHECK constraint.
+            int dimensions;
+            if (isTableEmpty(tx, postgres_table)) {
+              dimensions = 1;
+            } else {
+              /// All rows must contain the same number of dimensions.
+              /// 1 is ok. If number of dimensions in all rows is not the same -
+              /// such arrays are not able to be used as ClickHouse Array at all.
+              ///
+              /// Assume dimensions=1 for empty arrays.
+              auto postgres_column = doubleQuoteString(name_and_type.name);
+              pqxx::result result{tx.exec(fmt::format(
+                  "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) "
+                  "FROM {} LIMIT 1;",
+                  postgres_column, postgres_column, postgres_table
+              ))};
+
+              /// Nullable(Array) is not supported.
+              auto is_null = result[0][0].as<bool>();
+              if (is_null) {
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "PostgreSQL array cannot be NULL. Column: {}", postgres_column
+                );
+              }
+
+              dimensions = result[0][1].as<int>();
+            }
 
             /// It is always 1d array if it is in recheck.
             DataTypePtr type = assert_cast<const DataTypeArray *>(name_and_type.type.get())->getNestedType();

From 17aacda9e53bfa9359ad808c85b81bc299c86694 Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Sat, 2 Dec 2023 06:44:34 -0800
Subject: [PATCH 053/213] fix: format code for stylecheck

---
 .../fetchPostgreSQLTableStructure.cpp         | 60 +++++++++----------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 9890936007f..0a35bc8c2b5 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -160,14 +160,12 @@ static DataTypePtr convertPostgreSQLDataType(String & type, Fn<void()> auto && r
 
 /// Check if PostgreSQL relation is empty.
 /// postgres_table must be already quoted + schema-qualified.
-template<typename T>
-bool isTableEmpty(T &tx, const String & postgres_table) {
-  auto query = fmt::format(
-    "SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);",
-    postgres_table
-  );
-  pqxx::result result{tx.exec(query)};
-  return result[0][0].as<bool>();
+template <typename T>
+bool isTableEmpty(T & tx, const String & postgres_table)
+{
+    auto query = fmt::format("SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);", postgres_table);
+    pqxx::result result{tx.exec(query)};
+    return result[0][0].as<bool>();
 }
 
 template<typename T>
@@ -231,31 +229,31 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
             /// support for empty tables OR attempt fallback to a discovered
             /// array_ndims CHECK constraint.
             int dimensions;
-            if (isTableEmpty(tx, postgres_table)) {
-              dimensions = 1;
-            } else {
-              /// All rows must contain the same number of dimensions.
-              /// 1 is ok. If number of dimensions in all rows is not the same -
-              /// such arrays are not able to be used as ClickHouse Array at all.
-              ///
-              /// Assume dimensions=1 for empty arrays.
-              auto postgres_column = doubleQuoteString(name_and_type.name);
-              pqxx::result result{tx.exec(fmt::format(
-                  "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) "
-                  "FROM {} LIMIT 1;",
-                  postgres_column, postgres_column, postgres_table
-              ))};
+            if (isTableEmpty(tx, postgres_table))
+            {
+                dimensions = 1;
+            }
+            else
+            {
+                /// All rows must contain the same number of dimensions.
+                /// 1 is ok. If number of dimensions in all rows is not the same -
+                /// such arrays are not able to be used as ClickHouse Array at all.
+                ///
+                /// Assume dimensions=1 for empty arrays.
+                auto postgres_column = doubleQuoteString(name_and_type.name);
+                pqxx::result result{tx.exec(fmt::format(
+                    "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) "
+                    "FROM {} LIMIT 1;",
+                    postgres_column,
+                    postgres_column,
+                    postgres_table))};
 
-              /// Nullable(Array) is not supported.
-              auto is_null = result[0][0].as<bool>();
-              if (is_null) {
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "PostgreSQL array cannot be NULL. Column: {}", postgres_column
-                );
-              }
+                /// Nullable(Array) is not supported.
+                auto is_null = result[0][0].as<bool>();
+                if (is_null)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL array cannot be NULL. Column: {}", postgres_column);
 
-              dimensions = result[0][1].as<int>();
+                dimensions = result[0][1].as<int>();
             }
 
             /// It is always 1d array if it is in recheck.

From 3eb68a67403944a8ea096448d16cb8b2be91eebf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 4 Dec 2023 10:08:46 +0000
Subject: [PATCH 054/213] optimize_using_constraints is broken

---
 tests/clickhouse-test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index fe93864b202..006cc71bb4c 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -561,7 +561,6 @@ class SettingsRandomizer:
         "optimize_read_in_order": lambda: random.randint(0, 1),
         "optimize_or_like_chain": lambda: random.randint(1, 1),
         "optimize_substitute_columns": lambda: random.randint(1, 1),
-        "optimize_using_constraints": lambda: random.randint(1, 1),
         "enable_multiple_prewhere_read_steps": lambda: random.randint(0, 1),
         "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
         "optimize_aggregation_in_order": lambda: random.randint(0, 1),

From 7ce33b073783bd0eb204d737ad8bd7d6e535b5e8 Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:57:33 -0400
Subject: [PATCH 055/213] Adds 'not available on cloud' to Distributed Table
 Engine.

---
 docs/en/engines/table-engines/special/distributed.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index 14431c4c43b..e0d952c65e4 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -4,6 +4,10 @@ sidebar_position: 10
 sidebar_label: Distributed
 ---
 
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+<SelfManaged />
+
 # Distributed Table Engine
 
 Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.

From e2eb47b2ec8a7a2377c2b1f11c93fcc5b1e52da7 Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:58:19 -0400
Subject: [PATCH 056/213] Reverts last commit.

---
 docs/en/engines/table-engines/special/distributed.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index e0d952c65e4..14431c4c43b 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -4,10 +4,6 @@ sidebar_position: 10
 sidebar_label: Distributed
 ---
 
-import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
-
-<SelfManaged />
-
 # Distributed Table Engine
 
 Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.

From 40062405fb3b8e4f881c0c7258001dce18d8b59a Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:59:11 -0400
Subject: [PATCH 057/213] Adds 'not available on cloud' to Distributed Table
 Engine.

---
 docs/en/engines/table-engines/special/distributed.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index 14431c4c43b..e0d952c65e4 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -4,6 +4,10 @@ sidebar_position: 10
 sidebar_label: Distributed
 ---
 
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+<SelfManaged />
+
 # Distributed Table Engine
 
 Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.

From c6ca43b3418de24069c2544b0f0abe33b0147694 Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Mon, 4 Dec 2023 18:05:34 -0400
Subject: [PATCH 058/213] Moves self-hosted-only box under page title.

---
 docs/en/engines/table-engines/special/distributed.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index e0d952c65e4..c484d0803c3 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -1,15 +1,15 @@
 ---
-slug: /en/engines/table-engines/special/distributed
+sidebar_label: "Distributed"
 sidebar_position: 10
-sidebar_label: Distributed
+slug: /en/engines/table-engines/special/distributed
 ---
 
+# Distributed Table Engine
+
 import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
 
 <SelfManaged />
 
-# Distributed Table Engine
-
 Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.
 Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
 

From 5a51fdac954513a23422bb901d533fecc77bad0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 5 Dec 2023 11:33:40 +0800
Subject: [PATCH 059/213] modify

---
 src/Interpreters/Context.cpp                  | 1152 ++++++++++-------
 .../02931_max_num_to_warn.reference           |    6 +-
 .../0_stateless/02931_max_num_to_warn.sql     |    2 +-
 3 files changed, 673 insertions(+), 487 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index a2c94d59826..bc7fc8a0cb4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1,150 +1,150 @@
+#include <filesystem>
 #include <map>
-#include <set>
-#include <optional>
 #include <memory>
-#include <Poco/UUID.h>
-#include <Poco/Util/Application.h>
-#include <Common/SensitiveDataMasker.h>
-#include <Common/Macros.h>
-#include <Common/EventNotifier.h>
-#include <Common/Stopwatch.h>
-#include <Common/formatReadable.h>
-#include <Common/Throttler.h>
-#include <Common/thread_local_rng.h>
-#include <Common/FieldVisitorToString.h>
-#include <Common/getMultipleKeysFromConfig.h>
-#include <Common/callOnce.h>
-#include <Common/SharedLockGuard.h>
-#include <Coordination/KeeperDispatcher.h>
-#include <Core/BackgroundSchedulePool.h>
-#include <Formats/FormatFactory.h>
-#include <Databases/IDatabase.h>
-#include <Server/ServerType.h>
-#include <Storages/MarkCache.h>
-#include <Storages/MergeTree/MergeList.h>
-#include <Storages/MergeTree/MovesList.h>
-#include <Storages/MergeTree/ReplicatedFetchList.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/MergeTreeSettings.h>
-#include <Storages/CompressionCodecSelector.h>
-#include <Storages/StorageS3Settings.h>
-#include <Disks/DiskLocal.h>
-#include <Disks/ObjectStorages/DiskObjectStorage.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Disks/StoragePolicy.h>
-#include <IO/SynchronousReader.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <Interpreters/ActionLocksManager.h>
-#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
-#include <Interpreters/TemporaryDataOnDisk.h>
-#include <Interpreters/Cache/QueryCache.h>
-#include <Interpreters/Cache/FileCacheFactory.h>
-#include <Interpreters/SessionTracker.h>
-#include <Core/ServerSettings.h>
-#include <Interpreters/PreparedSets.h>
-#include <Core/Settings.h>
-#include <Core/SettingsQuirks.h>
+#include <optional>
+#include <set>
 #include <Access/AccessControl.h>
 #include <Access/ContextAccess.h>
 #include <Access/EnabledRolesInfo.h>
 #include <Access/EnabledRowPolicies.h>
-#include <Access/QuotaUsage.h>
-#include <Access/User.h>
-#include <Access/SettingsProfile.h>
-#include <Access/SettingsProfilesInfo.h>
-#include <Access/SettingsConstraintsAndProfileIDs.h>
 #include <Access/ExternalAuthenticators.h>
 #include <Access/GSSAcceptor.h>
-#include <IO/ResourceManagerFactory.h>
+#include <Access/QuotaUsage.h>
+#include <Access/SettingsConstraintsAndProfileIDs.h>
+#include <Access/SettingsProfile.h>
+#include <Access/SettingsProfilesInfo.h>
+#include <Access/User.h>
 #include <Backups/BackupsWorker.h>
+#include <Coordination/KeeperDispatcher.h>
+#include <Core/BackgroundSchedulePool.h>
+#include <Core/ServerSettings.h>
+#include <Core/Settings.h>
+#include <Core/SettingsQuirks.h>
+#include <Databases/IDatabase.h>
 #include <Dictionaries/Embedded/GeoDictionariesLoader.h>
-#include <Interpreters/EmbeddedDictionaries.h>
-#include <Interpreters/ExternalDictionariesLoader.h>
+#include <Disks/DiskLocal.h>
+#include <Disks/ObjectStorages/DiskObjectStorage.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Disks/StoragePolicy.h>
+#include <Formats/FormatFactory.h>
 #include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
 #include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
 #include <Functions/UserDefined/createUserDefinedSQLObjectsLoader.h>
-#include <Interpreters/ProcessList.h>
-#include <Interpreters/InterserverCredentials.h>
-#include <Interpreters/Cluster.h>
-#include <Interpreters/InterserverIOHandler.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/DDLWorker.h>
-#include <Interpreters/DDLTask.h>
-#include <Interpreters/Session.h>
-#include <Interpreters/TraceCollector.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/UncompressedCache.h>
 #include <IO/MMappedFileCache.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ResourceManagerFactory.h>
+#include <IO/SynchronousReader.h>
+#include <IO/UncompressedCache.h>
 #include <IO/WriteSettings.h>
-#include <Parsers/ASTCreateQuery.h>
+#include <Interpreters/ActionLocksManager.h>
+#include <Interpreters/AsynchronousInsertQueue.h>
+#include <Interpreters/Cache/FileCacheFactory.h>
+#include <Interpreters/Cache/QueryCache.h>
+#include <Interpreters/Cluster.h>
+#include <Interpreters/ClusterDiscovery.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/DDLTask.h>
+#include <Interpreters/DDLWorker.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/EmbeddedDictionaries.h>
+#include <Interpreters/ExternalDictionariesLoader.h>
+#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
+#include <Interpreters/InterpreterSelectWithUnionQuery.h>
+#include <Interpreters/InterserverCredentials.h>
+#include <Interpreters/InterserverIOHandler.h>
+#include <Interpreters/JIT/CompiledExpressionCache.h>
+#include <Interpreters/Lemmatizers.h>
+#include <Interpreters/PreparedSets.h>
+#include <Interpreters/ProcessList.h>
+#include <Interpreters/Session.h>
+#include <Interpreters/SessionTracker.h>
+#include <Interpreters/SynonymsExtensions.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
+#include <Interpreters/TraceCollector.h>
+#include <Interpreters/TransactionLog.h>
 #include <Parsers/ASTAsterisk.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
-#include <Common/StackTrace.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/FunctionParameterValuesVisitor.h>
+#include <Server/ServerType.h>
+#include <Storages/CompressionCodecSelector.h>
+#include <Storages/MarkCache.h>
+#include <Storages/MergeTree/BackgroundJobsAssignee.h>
+#include <Storages/MergeTree/MergeList.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
+#include <Storages/MergeTree/MergeTreeSettings.h>
+#include <Storages/MergeTree/MovesList.h>
+#include <Storages/MergeTree/ReplicatedFetchList.h>
+#include <Storages/StorageS3Settings.h>
+#include <Storages/StorageView.h>
+#include <TableFunctions/TableFunctionFactory.h>
+#include <re2/re2.h>
+#include <Poco/UUID.h>
+#include <Poco/Util/Application.h>
+#include <Common/Config/AbstractConfigurationComparison.h>
 #include <Common/Config/ConfigHelper.h>
 #include <Common/Config/ConfigProcessor.h>
-#include <Common/Config/AbstractConfigurationComparison.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ShellCommand.h>
-#include <Common/logger_useful.h>
-#include <Common/RemoteHostFilter.h>
+#include <Common/EventNotifier.h>
+#include <Common/FieldVisitorToString.h>
 #include <Common/HTTPHeaderFilter.h>
-#include <Interpreters/AsynchronousInsertQueue.h>
-#include <Interpreters/DatabaseCatalog.h>
-#include <Interpreters/JIT/CompiledExpressionCache.h>
-#include <Storages/MergeTree/BackgroundJobsAssignee.h>
-#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-#include <Interpreters/SynonymsExtensions.h>
-#include <Interpreters/Lemmatizers.h>
-#include <Interpreters/ClusterDiscovery.h>
-#include <Interpreters/TransactionLog.h>
-#include <filesystem>
-#include <re2/re2.h>
-#include <Storages/StorageView.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/FunctionParameterValuesVisitor.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Interpreters/InterpreterSelectWithUnionQuery.h>
+#include <Common/Macros.h>
+#include <Common/RemoteHostFilter.h>
+#include <Common/SensitiveDataMasker.h>
+#include <Common/SharedLockGuard.h>
+#include <Common/ShellCommand.h>
+#include <Common/StackTrace.h>
+#include <Common/Stopwatch.h>
+#include <Common/Throttler.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/callOnce.h>
+#include <Common/formatReadable.h>
+#include <Common/getMultipleKeysFromConfig.h>
+#include <Common/logger_useful.h>
+#include <Common/thread_local_rng.h>
 
 
 namespace fs = std::filesystem;
 
 namespace ProfileEvents
 {
-    extern const Event ContextLock;
-    extern const Event ContextLockWaitMicroseconds;
+extern const Event ContextLock;
+extern const Event ContextLockWaitMicroseconds;
 }
 
 namespace CurrentMetrics
 {
-    extern const Metric ContextLockWait;
-    extern const Metric BackgroundMovePoolTask;
-    extern const Metric BackgroundMovePoolSize;
-    extern const Metric BackgroundSchedulePoolTask;
-    extern const Metric BackgroundSchedulePoolSize;
-    extern const Metric BackgroundBufferFlushSchedulePoolTask;
-    extern const Metric BackgroundBufferFlushSchedulePoolSize;
-    extern const Metric BackgroundDistributedSchedulePoolTask;
-    extern const Metric BackgroundDistributedSchedulePoolSize;
-    extern const Metric BackgroundMessageBrokerSchedulePoolTask;
-    extern const Metric BackgroundMessageBrokerSchedulePoolSize;
-    extern const Metric BackgroundMergesAndMutationsPoolTask;
-    extern const Metric BackgroundMergesAndMutationsPoolSize;
-    extern const Metric BackgroundFetchesPoolTask;
-    extern const Metric BackgroundFetchesPoolSize;
-    extern const Metric BackgroundCommonPoolTask;
-    extern const Metric BackgroundCommonPoolSize;
-    extern const Metric MarksLoaderThreads;
-    extern const Metric MarksLoaderThreadsActive;
-    extern const Metric MarksLoaderThreadsScheduled;
-    extern const Metric IOPrefetchThreads;
-    extern const Metric IOPrefetchThreadsActive;
-    extern const Metric IOPrefetchThreadsScheduled;
-    extern const Metric IOWriterThreads;
-    extern const Metric IOWriterThreadsActive;
-    extern const Metric IOWriterThreadsScheduled;
-    extern const Metric AttachedTable;
-    extern const Metric AttachedDatabase;
-    extern const Metric PartsActive;
+extern const Metric ContextLockWait;
+extern const Metric BackgroundMovePoolTask;
+extern const Metric BackgroundMovePoolSize;
+extern const Metric BackgroundSchedulePoolTask;
+extern const Metric BackgroundSchedulePoolSize;
+extern const Metric BackgroundBufferFlushSchedulePoolTask;
+extern const Metric BackgroundBufferFlushSchedulePoolSize;
+extern const Metric BackgroundDistributedSchedulePoolTask;
+extern const Metric BackgroundDistributedSchedulePoolSize;
+extern const Metric BackgroundMessageBrokerSchedulePoolTask;
+extern const Metric BackgroundMessageBrokerSchedulePoolSize;
+extern const Metric BackgroundMergesAndMutationsPoolTask;
+extern const Metric BackgroundMergesAndMutationsPoolSize;
+extern const Metric BackgroundFetchesPoolTask;
+extern const Metric BackgroundFetchesPoolSize;
+extern const Metric BackgroundCommonPoolTask;
+extern const Metric BackgroundCommonPoolSize;
+extern const Metric MarksLoaderThreads;
+extern const Metric MarksLoaderThreadsActive;
+extern const Metric MarksLoaderThreadsScheduled;
+extern const Metric IOPrefetchThreads;
+extern const Metric IOPrefetchThreadsActive;
+extern const Metric IOPrefetchThreadsScheduled;
+extern const Metric IOWriterThreads;
+extern const Metric IOWriterThreadsActive;
+extern const Metric IOWriterThreadsScheduled;
+extern const Metric AttachedTable;
+extern const Metric AttachedDatabase;
+extern const Metric PartsActive;
 }
 
 
@@ -153,32 +153,33 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
-    extern const int UNKNOWN_DATABASE;
-    extern const int UNKNOWN_TABLE;
-    extern const int TABLE_ALREADY_EXISTS;
-    extern const int THERE_IS_NO_SESSION;
-    extern const int THERE_IS_NO_QUERY;
-    extern const int NO_ELEMENTS_IN_CONFIG;
-    extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT;
-    extern const int LOGICAL_ERROR;
-    extern const int INVALID_SETTING_VALUE;
-    extern const int UNKNOWN_READ_METHOD;
-    extern const int NOT_IMPLEMENTED;
-    extern const int UNKNOWN_FUNCTION;
-    extern const int ILLEGAL_COLUMN;
-    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
-    extern const int CLUSTER_DOESNT_EXIST;
+extern const int BAD_ARGUMENTS;
+extern const int UNKNOWN_DATABASE;
+extern const int UNKNOWN_TABLE;
+extern const int TABLE_ALREADY_EXISTS;
+extern const int THERE_IS_NO_SESSION;
+extern const int THERE_IS_NO_QUERY;
+extern const int NO_ELEMENTS_IN_CONFIG;
+extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT;
+extern const int LOGICAL_ERROR;
+extern const int INVALID_SETTING_VALUE;
+extern const int UNKNOWN_READ_METHOD;
+extern const int NOT_IMPLEMENTED;
+extern const int UNKNOWN_FUNCTION;
+extern const int ILLEGAL_COLUMN;
+extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
+extern const int CLUSTER_DOESNT_EXIST;
 }
 
-#define SHUTDOWN(log, desc, ptr, method) do             \
-{                                                       \
-    if (ptr)                                            \
-    {                                                   \
-        LOG_DEBUG(log, "Shutting down " desc);          \
-        (ptr)->method;                                  \
-    }                                                   \
-} while (false)                                         \
+#define SHUTDOWN(log, desc, ptr, method) \
+    do \
+    { \
+        if (ptr) \
+        { \
+            LOG_DEBUG(log, "Shutting down " desc); \
+            (ptr)->method; \
+        } \
+    } while (false)
 
 /** Set of known objects (environment), that could be used in query.
   * Shared (global) part. Order of members (especially, order of destruction) is very important.
@@ -200,8 +201,8 @@ struct ContextSharedPart : boost::noncopyable
     /// Separate mutex for re-initialization of zookeeper session. This operation could take a long time and must not interfere with another operations.
     mutable std::mutex zookeeper_mutex;
 
-    mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex);                 /// Client for ZooKeeper.
-    ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex);                      /// Stores zookeeper configs
+    mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex); /// Client for ZooKeeper.
+    ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex); /// Stores zookeeper configs
 
     ConfigurationPtr sensitive_data_masker_config;
 
@@ -210,37 +211,43 @@ struct ContextSharedPart : boost::noncopyable
     mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
 #endif
     mutable std::mutex auxiliary_zookeepers_mutex;
-    mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers TSA_GUARDED_BY(auxiliary_zookeepers_mutex);    /// Map for auxiliary ZooKeeper clients.
-    ConfigurationPtr auxiliary_zookeepers_config TSA_GUARDED_BY(auxiliary_zookeepers_mutex);           /// Stores auxiliary zookeepers configs
+    mutable std::map<String, zkutil::ZooKeeperPtr>
+        auxiliary_zookeepers TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Map for auxiliary ZooKeeper clients.
+    ConfigurationPtr auxiliary_zookeepers_config TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Stores auxiliary zookeepers configs
 
     /// No lock required for interserver_io_host, interserver_io_port, interserver_scheme modified only during initialization
-    String interserver_io_host;                             /// The host name by which this server is available for other servers.
-    UInt16 interserver_io_port = 0;                         /// and port.
-    String interserver_scheme;                              /// http or https
+    String interserver_io_host; /// The host name by which this server is available for other servers.
+    UInt16 interserver_io_port = 0; /// and port.
+    String interserver_scheme; /// http or https
     MultiVersion<InterserverCredentials> interserver_io_credentials;
 
-    String path TSA_GUARDED_BY(mutex);                       /// Path to the data directory, with a slash at the end.
-    String flags_path TSA_GUARDED_BY(mutex);                 /// Path to the directory with some control flags for server maintenance.
-    String user_files_path TSA_GUARDED_BY(mutex);            /// Path to the directory with user provided files, usable by 'file' table function.
-    String dictionaries_lib_path TSA_GUARDED_BY(mutex);      /// Path to the directory with user provided binaries and libraries for external dictionaries.
-    String user_scripts_path TSA_GUARDED_BY(mutex);          /// Path to the directory with user provided scripts.
-    String filesystem_caches_path TSA_GUARDED_BY(mutex);     /// Path to the directory with filesystem caches.
-    ConfigurationPtr config TSA_GUARDED_BY(mutex);           /// Global configuration settings.
-    String tmp_path TSA_GUARDED_BY(mutex);                   /// Path to the temporary files that occur when processing the request.
+    String path TSA_GUARDED_BY(mutex); /// Path to the data directory, with a slash at the end.
+    String flags_path TSA_GUARDED_BY(mutex); /// Path to the directory with some control flags for server maintenance.
+    String user_files_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided files, usable by 'file' table function.
+    String dictionaries_lib_path
+        TSA_GUARDED_BY(mutex); /// Path to the directory with user provided binaries and libraries for external dictionaries.
+    String user_scripts_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided scripts.
+    String filesystem_caches_path TSA_GUARDED_BY(mutex); /// Path to the directory with filesystem caches.
+    ConfigurationPtr config TSA_GUARDED_BY(mutex); /// Global configuration settings.
+    String tmp_path TSA_GUARDED_BY(mutex); /// Path to the temporary files that occur when processing the request.
 
     /// All temporary files that occur when processing the requests accounted here.
     /// Child scopes for more fine-grained accounting are created per user/query/etc.
     /// Initialized once during server startup.
     TemporaryDataOnDiskScopePtr root_temp_data_on_disk TSA_GUARDED_BY(mutex);
 
-    mutable std::unique_ptr<EmbeddedDictionaries> embedded_dictionaries TSA_GUARDED_BY(embedded_dictionaries_mutex);    /// Metrica's dictionaries. Have lazy initialization.
+    mutable std::unique_ptr<EmbeddedDictionaries>
+        embedded_dictionaries TSA_GUARDED_BY(embedded_dictionaries_mutex); /// Metrica's dictionaries. Have lazy initialization.
     mutable std::unique_ptr<ExternalDictionariesLoader> external_dictionaries_loader TSA_GUARDED_BY(external_dictionaries_mutex);
 
     ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository TSA_GUARDED_BY(external_dictionaries_mutex) = nullptr;
     scope_guard dictionaries_xmls TSA_GUARDED_BY(external_dictionaries_mutex);
 
-    mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> external_user_defined_executable_functions_loader TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
-    ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository TSA_GUARDED_BY(external_user_defined_executable_functions_mutex) = nullptr;
+    mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader>
+        external_user_defined_executable_functions_loader TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
+    ExternalLoaderXMLConfigRepository *
+        user_defined_executable_functions_config_repository TSA_GUARDED_BY(external_user_defined_executable_functions_mutex)
+        = nullptr;
     scope_guard user_defined_executable_functions_xmls TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
 
     mutable OnceFlag user_defined_sql_objects_loader_initialized;
@@ -258,39 +265,44 @@ struct ContextSharedPart : boost::noncopyable
     std::optional<BackupsWorker> backups_worker;
 
     /// No lock required for default_profile_name, system_profile_name, buffer_profile_name modified only during initialization
-    String default_profile_name;                                /// Default profile name used for default values.
-    String system_profile_name;                                 /// Profile used by system processes
-    String buffer_profile_name;                                 /// Profile used by Buffer engine for flushing to the underlying
+    String default_profile_name; /// Default profile name used for default values.
+    String system_profile_name; /// Profile used by system processes
+    String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
     std::unique_ptr<AccessControl> access_control TSA_GUARDED_BY(mutex);
     mutable OnceFlag resource_manager_initialized;
     mutable ResourceManagerPtr resource_manager;
-    mutable UncompressedCachePtr uncompressed_cache TSA_GUARDED_BY(mutex);            /// The cache of decompressed blocks.
-    mutable MarkCachePtr mark_cache TSA_GUARDED_BY(mutex);                            /// Cache of marks in compressed files.
+    mutable UncompressedCachePtr uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks.
+    mutable MarkCachePtr mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files.
     mutable OnceFlag load_marks_threadpool_initialized;
-    mutable std::unique_ptr<ThreadPool> load_marks_threadpool;  /// Threadpool for loading marks cache.
+    mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
     mutable OnceFlag prefetch_threadpool_initialized;
-    mutable std::unique_ptr<ThreadPool> prefetch_threadpool;    /// Threadpool for loading marks cache.
-    mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex);      /// The cache of decompressed blocks for MergeTree indices.
-    mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex);                          /// Cache of query results.
-    mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex);                      /// Cache of marks in compressed files of MergeTree indices.
-    mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex);                     /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
-    ProcessList process_list;                                   /// Executing queries at the moment.
+    mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache.
+    mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices.
+    mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
+    mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
+    mutable MMappedFileCachePtr mmap_cache
+        TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
+    ProcessList process_list; /// Executing queries at the moment.
     SessionTracker session_tracker;
     GlobalOvercommitTracker global_overcommit_tracker;
-    MergeList merge_list;                                       /// The list of executable merge (for (Replicated)?MergeTree)
-    MovesList moves_list;                                       /// The list of executing moves (for (Replicated)?MergeTree)
+    MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree)
+    MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree)
     ReplicatedFetchList replicated_fetch_list;
-    ConfigurationPtr users_config TSA_GUARDED_BY(mutex);                              /// Config with the users, profiles and quotas sections.
-    InterserverIOHandler interserver_io_handler;                /// Handler for interserver communication.
+    ConfigurationPtr users_config TSA_GUARDED_BY(mutex); /// Config with the users, profiles and quotas sections.
+    InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
 
     OnceFlag buffer_flush_schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
+    mutable std::unique_ptr<BackgroundSchedulePool>
+        buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
     OnceFlag schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool> schedule_pool;    /// A thread pool that can run different jobs in background (used in replicated tables)
+    mutable std::unique_ptr<BackgroundSchedulePool>
+        schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
     OnceFlag distributed_schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
+    mutable std::unique_ptr<BackgroundSchedulePool>
+        distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
     OnceFlag message_broker_schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool> message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
+    mutable std::unique_ptr<BackgroundSchedulePool>
+        message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
 
     mutable OnceFlag readers_initialized;
     mutable std::unique_ptr<IAsynchronousReader> asynchronous_remote_fs_reader;
@@ -300,19 +312,19 @@ struct ContextSharedPart : boost::noncopyable
     mutable OnceFlag threadpool_writer_initialized;
     mutable std::unique_ptr<ThreadPool> threadpool_writer;
 
-    mutable ThrottlerPtr replicated_fetches_throttler;      /// A server-wide throttler for replicated fetches
-    mutable ThrottlerPtr replicated_sends_throttler;        /// A server-wide throttler for replicated sends
+    mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches
+    mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends
 
-    mutable ThrottlerPtr remote_read_throttler;             /// A server-wide throttler for remote IO reads
-    mutable ThrottlerPtr remote_write_throttler;            /// A server-wide throttler for remote IO writes
+    mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads
+    mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes
 
-    mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
-    mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
+    mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
+    mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
 
-    mutable ThrottlerPtr backups_server_throttler;          /// A server-wide throttler for BACKUPs
+    mutable ThrottlerPtr backups_server_throttler; /// A server-wide throttler for BACKUPs
 
-    MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
-    std::unique_ptr<DDLWorker> ddl_worker TSA_GUARDED_BY(mutex);                  /// Process ddl commands from zk.
+    MultiVersion<Macros> macros; /// Substitutions extracted from config.
+    std::unique_ptr<DDLWorker> ddl_worker TSA_GUARDED_BY(mutex); /// Process ddl commands from zk.
     /// Rules for selecting the compression settings, depending on the size of the part.
     mutable std::unique_ptr<CompressionCodecSelector> compression_codec_selector TSA_GUARDED_BY(mutex);
     /// Storage disk chooser for MergeTree engines
@@ -322,22 +334,21 @@ struct ContextSharedPart : boost::noncopyable
 
     ServerSettings server_settings;
 
-    std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of MergeTree* engines.
-    std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of ReplicatedMergeTree* engines.
+    std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines.
+    std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines.
     std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
     std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
-                                                                   ///     std::atomic_size_t max_table_size_to_warn;
     std::atomic_size_t max_database_num_to_warn = 1000lu;
     std::atomic_size_t max_table_num_to_warn = 5000lu;
     std::atomic_size_t max_part_num_to_warn = 100000lu;
     /// No lock required for format_schema_path modified only during initialization
-    String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
+    String format_schema_path; /// Path to a directory that contains schema files used by input formats.
     mutable OnceFlag action_locks_manager_initialized;
-    ActionLocksManagerPtr action_locks_manager;             /// Set of storages' action lockers
+    ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
     OnceFlag system_logs_initialized;
-    std::unique_ptr<SystemLogs> system_logs TSA_GUARDED_BY(mutex);                /// Used to log queries and operations on parts
-    std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex);   /// Settings of S3 storage
-    std::vector<String> warnings TSA_GUARDED_BY(mutex);                           /// Store warning messages about server configuration.
+    std::unique_ptr<SystemLogs> system_logs TSA_GUARDED_BY(mutex); /// Used to log queries and operations on parts
+    std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage
+    std::vector<String> warnings TSA_GUARDED_BY(mutex); /// Store warning messages about server configuration.
 
     /// Background executors for *MergeTree tables
     /// Has background executors for MergeTree tables been initialized?
@@ -348,17 +359,17 @@ struct ContextSharedPart : boost::noncopyable
     OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex);
     OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex);
 
-    RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex);                    /// Allowed URL from config.xml
-    HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex);                    /// Forbidden HTTP headers from config.xml
+    RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex); /// Allowed URL from config.xml
+    HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex); /// Forbidden HTTP headers from config.xml
 
     /// No lock required for trace_collector modified only during initialization
-    std::optional<TraceCollector> trace_collector;          /// Thread collecting traces from threads executing queries
+    std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
 
     /// Clusters for distributed tables
     /// Initialized on demand (on distributed storages initialization) since Settings should be initialized
-    mutable std::mutex clusters_mutex;                       /// Guards clusters, clusters_config and cluster_discovery
+    mutable std::mutex clusters_mutex; /// Guards clusters, clusters_config and cluster_discovery
     std::shared_ptr<Clusters> clusters TSA_GUARDED_BY(clusters_mutex);
-    ConfigurationPtr clusters_config TSA_GUARDED_BY(clusters_mutex);                        /// Stores updated configs
+    ConfigurationPtr clusters_config TSA_GUARDED_BY(clusters_mutex); /// Stores updated configs
     std::unique_ptr<ClusterDiscovery> cluster_discovery TSA_GUARDED_BY(clusters_mutex);
 
     /// No lock required for async_insert_queue modified only during initialization
@@ -384,9 +395,7 @@ struct ContextSharedPart : boost::noncopyable
     bool is_server_completely_started TSA_GUARDED_BY(mutex) = false;
 
     ContextSharedPart()
-        : access_control(std::make_unique<AccessControl>())
-        , global_overcommit_tracker(&process_list)
-        , macros(std::make_unique<Macros>())
+        : access_control(std::make_unique<AccessControl>()), global_overcommit_tracker(&process_list), macros(std::make_unique<Macros>())
     {
         /// TODO: make it singleton (?)
         static std::atomic<size_t> num_calls{0};
@@ -507,7 +516,8 @@ struct ContextSharedPart : boost::noncopyable
         access_control->setExternalAuthenticatorsConfig(*config_value);
     }
 
-    const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::lock_guard<ContextSharedMutex> &) const TSA_REQUIRES(this->mutex)
+    const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::lock_guard<ContextSharedMutex> &) const
+        TSA_REQUIRES(this->mutex)
     {
         return config ? *config : Poco::Util::Application::instance().config();
     }
@@ -649,10 +659,7 @@ struct ContextSharedPart : boost::noncopyable
         total_memory_tracker.resetOvercommitTracker();
     }
 
-    bool hasTraceCollector() const
-    {
-        return trace_collector.has_value();
-    }
+    bool hasTraceCollector() const { return trace_collector.has_value(); }
 
     void initializeTraceCollector(std::shared_ptr<TraceLog> trace_log)
     {
@@ -718,16 +725,22 @@ ContextData::ContextData() = default;
 ContextData::ContextData(const ContextData &) = default;
 
 Context::Context() = default;
-Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs) {}
+Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs)
+{
+}
 
 SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default;
 SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default;
 SharedContextHolder::SharedContextHolder() = default;
 SharedContextHolder::~SharedContextHolder() = default;
-SharedContextHolder::SharedContextHolder(std::unique_ptr<ContextSharedPart> shared_context)
-    : shared(std::move(shared_context)) {}
+SharedContextHolder::SharedContextHolder(std::unique_ptr<ContextSharedPart> shared_context) : shared(std::move(shared_context))
+{
+}
 
-void SharedContextHolder::reset() { shared.reset(); }
+void SharedContextHolder::reset()
+{
+    shared.reset();
+}
 
 ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part)
 {
@@ -770,21 +783,57 @@ ContextMutablePtr Context::createCopy(const ContextMutablePtr & other)
 
 Context::~Context() = default;
 
-InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; }
-const InterserverIOHandler & Context::getInterserverIOHandler() const { return shared->interserver_io_handler; }
+InterserverIOHandler & Context::getInterserverIOHandler()
+{
+    return shared->interserver_io_handler;
+}
+const InterserverIOHandler & Context::getInterserverIOHandler() const
+{
+    return shared->interserver_io_handler;
+}
 
-ProcessList & Context::getProcessList() { return shared->process_list; }
-const ProcessList & Context::getProcessList() const { return shared->process_list; }
-OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; }
+ProcessList & Context::getProcessList()
+{
+    return shared->process_list;
+}
+const ProcessList & Context::getProcessList() const
+{
+    return shared->process_list;
+}
+OvercommitTracker * Context::getGlobalOvercommitTracker() const
+{
+    return &shared->global_overcommit_tracker;
+}
 
-SessionTracker & Context::getSessionTracker() { return shared->session_tracker; }
+SessionTracker & Context::getSessionTracker()
+{
+    return shared->session_tracker;
+}
 
-MergeList & Context::getMergeList() { return shared->merge_list; }
-const MergeList & Context::getMergeList() const { return shared->merge_list; }
-MovesList & Context::getMovesList() { return shared->moves_list; }
-const MovesList & Context::getMovesList() const { return shared->moves_list; }
-ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; }
-const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; }
+MergeList & Context::getMergeList()
+{
+    return shared->merge_list;
+}
+const MergeList & Context::getMergeList() const
+{
+    return shared->merge_list;
+}
+MovesList & Context::getMovesList()
+{
+    return shared->moves_list;
+}
+const MovesList & Context::getMovesList() const
+{
+    return shared->moves_list;
+}
+ReplicatedFetchList & Context::getReplicatedFetchList()
+{
+    return shared->replicated_fetch_list;
+}
+const ReplicatedFetchList & Context::getReplicatedFetchList() const
+{
+    return shared->replicated_fetch_list;
+}
 
 String Context::resolveDatabase(const String & database_name) const
 {
@@ -838,21 +887,19 @@ Strings Context::getWarnings() const
         common_warnings = shared->warnings;
 
         if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<DB::Int64>(shared->max_table_num_to_warn))
-            common_warnings.emplace_back(fmt::format("Attached tables is more than {}", shared->max_table_num_to_warn));
+            common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn));
 
         if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<DB::Int64>(shared->max_database_num_to_warn))
-            common_warnings.emplace_back(fmt::format("Attached databases is more than {}", shared->max_table_num_to_warn));
+            common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_table_num_to_warn));
 
         if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<DB::Int64>(shared->max_part_num_to_warn))
-            common_warnings.emplace_back(fmt::format("Active parts is more than {}", shared->max_part_num_to_warn));
+            common_warnings.emplace_back(fmt::format("The number of active parts is more than {}", shared->max_part_num_to_warn));
     }
     /// Make setting's name ordered
     std::set<String> obsolete_settings;
     for (const auto & setting : settings)
-    {
         if (setting.isValueChanged() && setting.isObsolete())
             obsolete_settings.emplace(setting.getName());
-    }
 
     if (!obsolete_settings.empty())
     {
@@ -868,7 +915,8 @@ Strings Context::getWarnings() const
         }
         res = res + "]" + (single_element ? " is" : " are")
             + " changed. "
-              "Please check 'SELECT * FROM system.settings WHERE changed AND is_obsolete' and read the changelog at https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md";
+              "Please check 'SELECT * FROM system.settings WHERE changed AND is_obsolete' and read the changelog at "
+              "https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md";
         common_warnings.emplace_back(res);
     }
 
@@ -966,9 +1014,12 @@ try
 }
 catch (...)
 {
-    DB::tryLogCurrentException(log, fmt::format(
-        "Caught exception while setup temporary path: {}. "
-        "It is ok to skip this exception as cleaning old temporary files is not necessary", path));
+    DB::tryLogCurrentException(
+        log,
+        fmt::format(
+            "Caught exception while setup temporary path: {}. "
+            "It is ok to skip this exception as cleaning old temporary files is not necessary",
+            path));
 }
 
 static VolumePtr createLocalSingleDiskVolume(const std::string & path, const Poco::Util::AbstractConfiguration & config_)
@@ -992,9 +1043,7 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size)
     VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, shared->getConfigRefWithLock(lock));
 
     for (const auto & disk : volume->getDisks())
-    {
         setupTmpPath(shared->log, disk->getPath());
-    }
 
     shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
 }
@@ -1010,13 +1059,15 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
     }
 
     if (tmp_policy->getVolumes().size() != 1)
-        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-            "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name);
+        throw Exception(
+            ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+            "Policy '{}' is used temporary files, such policy should have exactly one volume",
+            policy_name);
 
     VolumePtr volume = tmp_policy->getVolume(0);
 
     if (volume->getDisks().empty())
-         throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files");
+        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files");
 
     for (const auto & disk : volume->getDisks())
     {
@@ -1029,9 +1080,11 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
         if (dynamic_cast<const DiskLocal *>(disk_ptr.get()) == nullptr)
         {
             const auto * disk_raw_ptr = disk_ptr.get();
-            throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+            throw Exception(
+                ErrorCodes::NO_ELEMENTS_IN_CONFIG,
                 "Disk '{}' ({}) is not local and can't be used for temporary files",
-                disk_ptr->getName(), typeid(*disk_raw_ptr).name());
+                disk_ptr->getName(),
+                typeid(*disk_raw_ptr).name());
         }
 
         setupTmpPath(shared->log, disk->getPath());
@@ -1156,9 +1209,11 @@ void Context::setUser(const UUID & user_id_, const std::optional<const std::vect
     auto & access_control = getAccessControl();
     auto user = access_control.read<User>(user_id_);
 
-    auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
+    auto new_current_roles
+        = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
     auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {});
-    auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
+    auto enabled_profiles = access_control.getEnabledSettingsInfo(
+        user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
     const auto & database = user->default_database;
 
     /// Apply user's profiles, constraints, settings, roles.
@@ -1249,18 +1304,55 @@ void Context::checkAccessImpl(const Args &... args) const
     return getAccess()->checkAccess(args...);
 }
 
-void Context::checkAccess(const AccessFlags & flags) const { return checkAccessImpl(flags); }
-void Context::checkAccess(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); }
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); }
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); }
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector<std::string_view> & columns) const { return checkAccessImpl(flags, database, table, columns); }
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); }
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName()); }
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, std::string_view column) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), column); }
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const std::vector<std::string_view> & columns) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns); }
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const Strings & columns) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns); }
-void Context::checkAccess(const AccessRightsElement & element) const { return checkAccessImpl(element); }
-void Context::checkAccess(const AccessRightsElements & elements) const { return checkAccessImpl(elements); }
+void Context::checkAccess(const AccessFlags & flags) const
+{
+    return checkAccessImpl(flags);
+}
+void Context::checkAccess(const AccessFlags & flags, std::string_view database) const
+{
+    return checkAccessImpl(flags, database);
+}
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const
+{
+    return checkAccessImpl(flags, database, table);
+}
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const
+{
+    return checkAccessImpl(flags, database, table, column);
+}
+void Context::checkAccess(
+    const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector<std::string_view> & columns) const
+{
+    return checkAccessImpl(flags, database, table, columns);
+}
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const
+{
+    return checkAccessImpl(flags, database, table, columns);
+}
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id) const
+{
+    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName());
+}
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, std::string_view column) const
+{
+    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), column);
+}
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const std::vector<std::string_view> & columns) const
+{
+    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns);
+}
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const Strings & columns) const
+{
+    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns);
+}
+void Context::checkAccess(const AccessRightsElement & element) const
+{
+    return checkAccessImpl(element);
+}
+void Context::checkAccess(const AccessRightsElements & elements) const
+{
+    return checkAccessImpl(elements);
+}
 
 std::shared_ptr<const ContextAccess> Context::getAccess() const
 {
@@ -1270,7 +1362,8 @@ std::shared_ptr<const ContextAccess> Context::getAccess() const
         /// If setUserID() was never called then this must be the global context with the full access.
         bool full_access = !user_id;
 
-        return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info};
+        return ContextAccessParams{
+            user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info};
     };
 
     /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights.
@@ -1325,7 +1418,8 @@ std::optional<QuotaUsage> Context::getQuotaUsage() const
     return getAccess()->getQuotaUsage();
 }
 
-void Context::setCurrentProfileWithLock(const String & profile_name, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
+void Context::setCurrentProfileWithLock(
+    const String & profile_name, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
 {
     try
     {
@@ -1345,7 +1439,8 @@ void Context::setCurrentProfileWithLock(const UUID & profile_id, bool check_cons
     setCurrentProfilesWithLock(*profile_info, check_constraints, lock);
 }
 
-void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
+void Context::setCurrentProfilesWithLock(
+    const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
 {
     if (check_constraints)
         checkSettingsConstraintsWithLock(profiles_info.settings, SettingSource::PROFILE);
@@ -1386,9 +1481,10 @@ std::vector<UUID> Context::getEnabledProfiles() const
 
 ResourceManagerPtr Context::getResourceManager() const
 {
-    callOnce(shared->resource_manager_initialized, [&] {
-        shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "dynamic"));
-    });
+    callOnce(
+        shared->resource_manager_initialized,
+        [&]
+        { shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "dynamic")); });
 
     return shared->resource_manager;
 }
@@ -1656,17 +1752,18 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             ASTCreateQuery create;
             create.select = query->as<ASTSelectWithUnionQuery>();
             auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, getQueryContext());
-            auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
-                                                     create,
-                                                     ColumnsDescription(sample_block.getNamesAndTypesList()),
-                                                     /* comment */ "",
-                                                     /* is_parameterized_view */ true);
+            auto res = std::make_shared<StorageView>(
+                StorageID(database_name, table_name),
+                create,
+                ColumnsDescription(sample_block.getNamesAndTypesList()),
+                /* comment */ "",
+                /* is_parameterized_view */ true);
             res->startup();
             function->prefer_subquery_to_function_formatting = true;
             return res;
         }
     }
-    auto hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
+    auto hash = table_expression->getTreeHash(/*ignore_aliases=*/true);
     auto key = toString(hash);
     StoragePtr & res = table_function_results[key];
     if (!res)
@@ -1679,21 +1776,19 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         catch (Exception & e)
         {
             if (e.code() == ErrorCodes::UNKNOWN_FUNCTION)
-            {
                 e.addMessage(" or incorrect parameterized view");
-            }
             throw;
         }
 
-        uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
+        uint64_t use_structure_from_insertion_table_in_table_functions
+            = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
         if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
         {
-            const auto & insert_columns = DatabaseCatalog::instance()
-                                              .getTable(getInsertionTable(), shared_from_this())
-                                              ->getInMemoryMetadataPtr()
-                                              ->getColumns();
+            const auto & insert_columns
+                = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
 
-            const auto & insert_column_names = hasInsertionTableColumnNames() ? *getInsertionTableColumnNames() : insert_columns.getOrdinary().getNames();
+            const auto & insert_column_names
+                = hasInsertionTableColumnNames() ? *getInsertionTableColumnNames() : insert_columns.getOrdinary().getNames();
             DB::ColumnsDescription structure_hint;
 
             bool use_columns_from_insert_query = true;
@@ -1702,7 +1797,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             /// insert table columns to table function columns through names from SELECT expression.
 
             auto insert_column_name_it = insert_column_names.begin();
-            auto insert_column_names_end = insert_column_names.end();  /// end iterator of the range covered by possible asterisk
+            auto insert_column_names_end = insert_column_names.end(); /// end iterator of the range covered by possible asterisk
             auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
             bool asterisk = false;
             const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
@@ -1719,7 +1814,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                         if (asterisk)
                         {
                             if (use_structure_from_insertion_table_in_table_functions == 1)
-                                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                                throw Exception(
+                                    ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
 
                             use_columns_from_insert_query = false;
                             break;
@@ -1752,7 +1848,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                     if (!structure_hint.empty())
                     {
                         if (use_structure_from_insertion_table_in_table_functions == 1)
-                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                            throw Exception(
+                                ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
 
                         use_columns_from_insert_query = false;
                         break;
@@ -1790,7 +1887,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             {
                 /// For input function we should check if input format supports reading subset of columns.
                 if (table_function_ptr->getName() == "input")
-                    use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat(), shared_from_this());
+                    use_columns_from_insert_query
+                        = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat(), shared_from_this());
                 else
                     use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(shared_from_this());
             }
@@ -1814,9 +1912,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
                     if (!structure_hint.empty())
                         table_function_ptr->setStructureHint(structure_hint);
-
-                } else if (use_structure_from_insertion_table_in_table_functions == 1)
-                    throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
+                }
+                else if (use_structure_from_insertion_table_in_table_functions == 1)
+                    throw Exception(
+                        ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH,
+                        "Number of columns in insert table less than required by SELECT expression.");
             }
         }
 
@@ -1826,7 +1926,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         ///
         ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
         ///
-        auto new_hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
+        auto new_hash = table_expression->getTreeHash(/*ignore_aliases=*/true);
         if (hash != new_hash)
         {
             key = toString(new_hash);
@@ -1838,14 +1938,12 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr)
 {
-    const auto hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
+    const auto hash = table_expression->getTreeHash(/*ignore_aliases=*/true);
     const auto key = toString(hash);
     StoragePtr & res = table_function_results[key];
 
     if (!res)
-    {
         res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
-    }
 
     return res;
 }
@@ -1854,8 +1952,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 void Context::addViewSource(const StoragePtr & storage)
 {
     if (view_source)
-        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary view source storage {} already exists.",
-            backQuoteIfNeed(view_source->getName()));
+        throw Exception(
+            ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary view source storage {} already exists.", backQuoteIfNeed(view_source->getName()));
     view_source = storage;
 }
 
@@ -1916,13 +2014,12 @@ void Context::applySettingChangeWithLock(const SettingChange & change, const std
     catch (Exception & e)
     {
         e.addMessage(fmt::format(
-                         "in attempt to set the value of setting '{}' to {}",
-                         change.name, applyVisitor(FieldVisitorToString(), change.value)));
+            "in attempt to set the value of setting '{}' to {}", change.name, applyVisitor(FieldVisitorToString(), change.value)));
         throw;
     }
 }
 
-void Context::applySettingsChangesWithLock(const SettingsChanges & changes, const std::lock_guard<ContextSharedMutex>& lock)
+void Context::applySettingsChangesWithLock(const SettingsChanges & changes, const std::lock_guard<ContextSharedMutex> & lock)
 {
     for (const SettingChange & change : changes)
         applySettingChangeWithLock(change, lock);
@@ -1950,8 +2047,7 @@ void Context::applySettingChange(const SettingChange & change)
     catch (Exception & e)
     {
         e.addMessage(fmt::format(
-                         "in attempt to set the value of setting '{}' to {}",
-                         change.name, applyVisitor(FieldVisitorToString(), change.value)));
+            "in attempt to set the value of setting '{}' to {}", change.name, applyVisitor(FieldVisitorToString(), change.value)));
         throw;
     }
 }
@@ -1988,7 +2084,8 @@ void Context::clampToSettingsConstraintsWithLock(SettingsChanges & changes, Sett
     getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(settings, changes, source);
 }
 
-void Context::checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const
+void Context::checkMergeTreeSettingsConstraintsWithLock(
+    const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const
 {
     getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(merge_tree_settings, changes);
 }
@@ -2032,7 +2129,7 @@ void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_
 void Context::resetSettingsToDefaultValue(const std::vector<String> & names)
 {
     std::lock_guard lock(mutex);
-    for (const String & name: names)
+    for (const String & name : names)
         settings.setDefaultValue(name);
 }
 
@@ -2066,9 +2163,10 @@ String Context::getInitialQueryId() const
 void Context::setCurrentDatabaseNameInGlobalContext(const String & name)
 {
     if (!isGlobalContext())
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-                        "Cannot set current database for non global context, this method should "
-                        "be used during server initialization");
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Cannot set current database for non global context, this method should "
+            "be used during server initialization");
     std::lock_guard lock(mutex);
 
     if (!current_database.empty())
@@ -2112,13 +2210,12 @@ void Context::setCurrentQueryId(const String & query_id)
 
 
     String query_id_to_set = query_id;
-    if (query_id_to_set.empty())    /// If the user did not submit his query_id, then we generate it ourselves.
+    if (query_id_to_set.empty()) /// If the user did not submit his query_id, then we generate it ourselves.
     {
         /// Use protected constructor.
         struct QueryUUID : Poco::UUID
         {
-            QueryUUID(const char * bytes, Poco::UUID::Version version)
-                : Poco::UUID(bytes, version) {}
+            QueryUUID(const char * bytes, Poco::UUID::Version version) : Poco::UUID(bytes, version) { }
         };
 
         query_id_to_set = QueryUUID(random.bytes, Poco::UUID::UUID_RANDOM).toString();
@@ -2184,7 +2281,8 @@ void Context::setMacros(std::unique_ptr<Macros> && macros)
 ContextMutablePtr Context::getQueryContext() const
 {
     auto ptr = query_context.lock();
-    if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired");
+    if (!ptr)
+        throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired");
     return ptr;
 }
 
@@ -2197,20 +2295,23 @@ bool Context::isInternalSubquery() const
 ContextMutablePtr Context::getSessionContext() const
 {
     auto ptr = session_context.lock();
-    if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_SESSION, "There is no session or session context has expired");
+    if (!ptr)
+        throw Exception(ErrorCodes::THERE_IS_NO_SESSION, "There is no session or session context has expired");
     return ptr;
 }
 
 ContextMutablePtr Context::getGlobalContext() const
 {
     auto ptr = global_context.lock();
-    if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired");
+    if (!ptr)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired");
     return ptr;
 }
 
 ContextMutablePtr Context::getBufferContext() const
 {
-    if (!buffer_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no buffer context");
+    if (!buffer_context)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no buffer context");
     return buffer_context;
 }
 
@@ -2269,11 +2370,11 @@ ExternalDictionariesLoader & Context::getExternalDictionariesLoader()
     return getExternalDictionariesLoaderWithLock(lock);
 }
 
-ExternalDictionariesLoader & Context::getExternalDictionariesLoaderWithLock(const std::lock_guard<std::mutex> &) TSA_REQUIRES(shared->external_dictionaries_mutex)
+ExternalDictionariesLoader & Context::getExternalDictionariesLoaderWithLock(const std::lock_guard<std::mutex> &)
+    TSA_REQUIRES(shared->external_dictionaries_mutex)
 {
     if (!shared->external_dictionaries_loader)
-        shared->external_dictionaries_loader =
-            std::make_unique<ExternalDictionariesLoader>(getGlobalContext());
+        shared->external_dictionaries_loader = std::make_unique<ExternalDictionariesLoader>(getGlobalContext());
     return *shared->external_dictionaries_loader;
 }
 
@@ -2289,11 +2390,12 @@ ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedEx
 }
 
 ExternalUserDefinedExecutableFunctionsLoader &
-Context::getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard<std::mutex> &) TSA_REQUIRES(shared->external_user_defined_executable_functions_mutex)
+Context::getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard<std::mutex> &)
+    TSA_REQUIRES(shared->external_user_defined_executable_functions_mutex)
 {
     if (!shared->external_user_defined_executable_functions_loader)
-        shared->external_user_defined_executable_functions_loader =
-            std::make_unique<ExternalUserDefinedExecutableFunctionsLoader>(getGlobalContext());
+        shared->external_user_defined_executable_functions_loader
+            = std::make_unique<ExternalUserDefinedExecutableFunctionsLoader>(getGlobalContext());
     return *shared->external_user_defined_executable_functions_loader;
 }
 
@@ -2305,10 +2407,8 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_
     {
         auto geo_dictionaries_loader = std::make_unique<GeoDictionariesLoader>();
 
-        shared->embedded_dictionaries = std::make_unique<EmbeddedDictionaries>(
-            std::move(geo_dictionaries_loader),
-            getGlobalContext(),
-            throw_on_error);
+        shared->embedded_dictionaries
+            = std::make_unique<EmbeddedDictionaries>(std::move(geo_dictionaries_loader), getGlobalContext(), throw_on_error);
     }
 
     return *shared->embedded_dictionaries;
@@ -2377,7 +2477,8 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr
     if (shared->user_defined_executable_functions_config_repository)
     {
         shared->user_defined_executable_functions_config_repository->updatePatterns(patterns);
-        external_user_defined_executable_functions_loader.reloadConfig(shared->user_defined_executable_functions_config_repository->getName());
+        external_user_defined_executable_functions_loader.reloadConfig(
+            shared->user_defined_executable_functions_config_repository->getName());
         return;
     }
 
@@ -2385,14 +2486,15 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr
     auto config_path = getConfigRef().getString("config-file", "config.xml");
     auto repository = std::make_unique<ExternalLoaderXMLConfigRepository>(app_path, config_path, patterns);
     shared->user_defined_executable_functions_config_repository = repository.get();
-    shared->user_defined_executable_functions_xmls = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository));
+    shared->user_defined_executable_functions_xmls
+        = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository));
 }
 
 const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() const
 {
-    callOnce(shared->user_defined_sql_objects_loader_initialized, [&] {
-        shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
-    });
+    callOnce(
+        shared->user_defined_sql_objects_loader_initialized,
+        [&] { shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); });
 
     SharedLockGuard lock(shared->mutex);
     return *shared->user_defined_sql_objects_loader;
@@ -2400,9 +2502,9 @@ const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() c
 
 IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader()
 {
-    callOnce(shared->user_defined_sql_objects_loader_initialized, [&] {
-        shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
-    });
+    callOnce(
+        shared->user_defined_sql_objects_loader_initialized,
+        [&] { shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); });
 
     SharedLockGuard lock(shared->mutex);
     return *shared->user_defined_sql_objects_loader;
@@ -2412,18 +2514,14 @@ IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader()
 
 SynonymsExtensions & Context::getSynonymsExtensions() const
 {
-    callOnce(shared->synonyms_extensions_initialized, [&] {
-        shared->synonyms_extensions.emplace(getConfigRef());
-    });
+    callOnce(shared->synonyms_extensions_initialized, [&] { shared->synonyms_extensions.emplace(getConfigRef()); });
 
     return *shared->synonyms_extensions;
 }
 
 Lemmatizers & Context::getLemmatizers() const
 {
-    callOnce(shared->lemmatizers_initialized, [&] {
-        shared->lemmatizers.emplace(getConfigRef());
-    });
+    callOnce(shared->lemmatizers_initialized, [&] { shared->lemmatizers.emplace(getConfigRef()); });
 
     return *shared->lemmatizers;
 }
@@ -2431,17 +2529,21 @@ Lemmatizers & Context::getLemmatizers() const
 
 BackupsWorker & Context::getBackupsWorker() const
 {
-    callOnce(shared->backups_worker_initialized, [&] {
-        const auto & config = getConfigRef();
-        const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true);
-        const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true);
+    callOnce(
+        shared->backups_worker_initialized,
+        [&]
+        {
+            const auto & config = getConfigRef();
+            const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true);
+            const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true);
 
-        const auto & settings_ref = getSettingsRef();
-        UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
-        UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
+            const auto & settings_ref = getSettingsRef();
+            UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
+            UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
 
-        shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
-    });
+            shared->backups_worker.emplace(
+                getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
+        });
 
     return *shared->backups_worker;
 }
@@ -2556,13 +2658,21 @@ void Context::clearMarkCache() const
 
 ThreadPool & Context::getLoadMarksThreadpool() const
 {
-    callOnce(shared->load_marks_threadpool_initialized, [&] {
-        const auto & config = getConfigRef();
-        auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50);
-        auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000);
-        shared->load_marks_threadpool = std::make_unique<ThreadPool>(
-            CurrentMetrics::MarksLoaderThreads, CurrentMetrics::MarksLoaderThreadsActive, CurrentMetrics::MarksLoaderThreadsScheduled, pool_size, pool_size, queue_size);
-    });
+    callOnce(
+        shared->load_marks_threadpool_initialized,
+        [&]
+        {
+            const auto & config = getConfigRef();
+            auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50);
+            auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000);
+            shared->load_marks_threadpool = std::make_unique<ThreadPool>(
+                CurrentMetrics::MarksLoaderThreads,
+                CurrentMetrics::MarksLoaderThreadsActive,
+                CurrentMetrics::MarksLoaderThreadsScheduled,
+                pool_size,
+                pool_size,
+                queue_size);
+        });
 
     return *shared->load_marks_threadpool;
 }
@@ -2739,13 +2849,21 @@ void Context::clearCaches() const
 
 ThreadPool & Context::getPrefetchThreadpool() const
 {
-    callOnce(shared->prefetch_threadpool_initialized, [&] {
-        const auto & config = getConfigRef();
-        auto pool_size = config.getUInt(".prefetch_threadpool_pool_size", 100);
-        auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
-        shared->prefetch_threadpool = std::make_unique<ThreadPool>(
-            CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, CurrentMetrics::IOPrefetchThreadsScheduled, pool_size, pool_size, queue_size);
-    });
+    callOnce(
+        shared->prefetch_threadpool_initialized,
+        [&]
+        {
+            const auto & config = getConfigRef();
+            auto pool_size = config.getUInt(".prefetch_threadpool_pool_size", 100);
+            auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
+            shared->prefetch_threadpool = std::make_unique<ThreadPool>(
+                CurrentMetrics::IOPrefetchThreads,
+                CurrentMetrics::IOPrefetchThreadsActive,
+                CurrentMetrics::IOPrefetchThreadsScheduled,
+                pool_size,
+                pool_size,
+                queue_size);
+        });
 
     return *shared->prefetch_threadpool;
 }
@@ -2758,13 +2876,16 @@ size_t Context::getPrefetchThreadpoolSize() const
 
 BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
 {
-    callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {
-        shared->buffer_flush_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            shared->server_settings.background_buffer_flush_schedule_pool_size,
-            CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
-            CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
-            "BgBufSchPool");
-    });
+    callOnce(
+        shared->buffer_flush_schedule_pool_initialized,
+        [&]
+        {
+            shared->buffer_flush_schedule_pool = std::make_unique<BackgroundSchedulePool>(
+                shared->server_settings.background_buffer_flush_schedule_pool_size,
+                CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
+                CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
+                "BgBufSchPool");
+        });
 
     return *shared->buffer_flush_schedule_pool;
 }
@@ -2776,11 +2897,16 @@ BackgroundTaskSchedulingSettings Context::getBackgroundProcessingTaskSchedulingS
     const auto & config = getConfigRef();
     task_settings.thread_sleep_seconds = config.getDouble("background_processing_pool_thread_sleep_seconds", 10);
     task_settings.thread_sleep_seconds_random_part = config.getDouble("background_processing_pool_thread_sleep_seconds_random_part", 1.0);
-    task_settings.thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
-    task_settings.task_sleep_seconds_when_no_work_min = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10);
-    task_settings.task_sleep_seconds_when_no_work_max = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600);
-    task_settings.task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
-    task_settings.task_sleep_seconds_when_no_work_random_part = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
+    task_settings.thread_sleep_seconds_if_nothing_to_do
+        = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
+    task_settings.task_sleep_seconds_when_no_work_min
+        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10);
+    task_settings.task_sleep_seconds_when_no_work_max
+        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600);
+    task_settings.task_sleep_seconds_when_no_work_multiplier
+        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
+    task_settings.task_sleep_seconds_when_no_work_random_part
+        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
     return task_settings;
 }
 
@@ -2790,51 +2916,66 @@ BackgroundTaskSchedulingSettings Context::getBackgroundMoveTaskSchedulingSetting
 
     const auto & config = getConfigRef();
     task_settings.thread_sleep_seconds = config.getDouble("background_move_processing_pool_thread_sleep_seconds", 10);
-    task_settings.thread_sleep_seconds_random_part = config.getDouble("background_move_processing_pool_thread_sleep_seconds_random_part", 1.0);
-    task_settings.thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_move_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
-    task_settings.task_sleep_seconds_when_no_work_min = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_min", 10);
-    task_settings.task_sleep_seconds_when_no_work_max = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_max", 600);
-    task_settings.task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
-    task_settings.task_sleep_seconds_when_no_work_random_part = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
+    task_settings.thread_sleep_seconds_random_part
+        = config.getDouble("background_move_processing_pool_thread_sleep_seconds_random_part", 1.0);
+    task_settings.thread_sleep_seconds_if_nothing_to_do
+        = config.getDouble("background_move_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
+    task_settings.task_sleep_seconds_when_no_work_min
+        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_min", 10);
+    task_settings.task_sleep_seconds_when_no_work_max
+        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_max", 600);
+    task_settings.task_sleep_seconds_when_no_work_multiplier
+        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
+    task_settings.task_sleep_seconds_when_no_work_random_part
+        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
 
     return task_settings;
 }
 
 BackgroundSchedulePool & Context::getSchedulePool() const
 {
-    callOnce(shared->schedule_pool_initialized, [&] {
-        shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            shared->server_settings.background_schedule_pool_size,
-            CurrentMetrics::BackgroundSchedulePoolTask,
-            CurrentMetrics::BackgroundSchedulePoolSize,
-            "BgSchPool");
-    });
+    callOnce(
+        shared->schedule_pool_initialized,
+        [&]
+        {
+            shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
+                shared->server_settings.background_schedule_pool_size,
+                CurrentMetrics::BackgroundSchedulePoolTask,
+                CurrentMetrics::BackgroundSchedulePoolSize,
+                "BgSchPool");
+        });
 
     return *shared->schedule_pool;
 }
 
 BackgroundSchedulePool & Context::getDistributedSchedulePool() const
 {
-    callOnce(shared->distributed_schedule_pool_initialized, [&] {
-        shared->distributed_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            shared->server_settings.background_distributed_schedule_pool_size,
-            CurrentMetrics::BackgroundDistributedSchedulePoolTask,
-            CurrentMetrics::BackgroundDistributedSchedulePoolSize,
-            "BgDistSchPool");
-    });
+    callOnce(
+        shared->distributed_schedule_pool_initialized,
+        [&]
+        {
+            shared->distributed_schedule_pool = std::make_unique<BackgroundSchedulePool>(
+                shared->server_settings.background_distributed_schedule_pool_size,
+                CurrentMetrics::BackgroundDistributedSchedulePoolTask,
+                CurrentMetrics::BackgroundDistributedSchedulePoolSize,
+                "BgDistSchPool");
+        });
 
     return *shared->distributed_schedule_pool;
 }
 
 BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
 {
-    callOnce(shared->message_broker_schedule_pool_initialized, [&] {
-        shared->message_broker_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            shared->server_settings.background_message_broker_schedule_pool_size,
-            CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
-            CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
-            "BgMBSchPool");
-    });
+    callOnce(
+        shared->message_broker_schedule_pool_initialized,
+        [&]
+        {
+            shared->message_broker_schedule_pool = std::make_unique<BackgroundSchedulePool>(
+                shared->server_settings.background_message_broker_schedule_pool_size,
+                CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
+                CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
+                "BgMBSchPool");
+        });
 
     return *shared->message_broker_schedule_pool;
 }
@@ -3013,7 +3154,10 @@ bool Context::tryCheckClientConnectionToMyKeeperCluster() const
             {
                 if (checkZooKeeperConfigIsLocal(getConfigRef(), "auxiliary_zookeepers." + aux_zk_name))
                 {
-                    LOG_DEBUG(shared->log, "Our Keeper server is participant of the auxiliary zookeeper cluster ({}), will try to connect to it", aux_zk_name);
+                    LOG_DEBUG(
+                        shared->log,
+                        "Our Keeper server is participant of the auxiliary zookeeper cluster ({}), will try to connect to it",
+                        aux_zk_name);
                     getAuxiliaryZooKeeper(aux_zk_name);
                     /// Connected, return true
                     return true;
@@ -3086,13 +3230,17 @@ void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) cons
         if (start_async)
         {
             assert(!is_standalone_app);
-            LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster, "
-                     "will wait for Keeper asynchronously");
+            LOG_INFO(
+                shared->log,
+                "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster, "
+                "will wait for Keeper asynchronously");
         }
         else
         {
-            LOG_INFO(shared->log, "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start, "
-                     "will wait for Keeper synchronously");
+            LOG_INFO(
+                shared->log,
+                "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start, "
+                "will wait for Keeper synchronously");
         }
 
         shared->keeper_dispatcher = std::make_shared<KeeperDispatcher>();
@@ -3161,8 +3309,9 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
                 "config.xml",
                 name);
 
-        zookeeper = shared->auxiliary_zookeepers.emplace(name,
-                        std::make_shared<zkutil::ZooKeeper>(config, "auxiliary_zookeepers." + name, getZooKeeperLog())).first;
+        zookeeper = shared->auxiliary_zookeepers
+                        .emplace(name, std::make_shared<zkutil::ZooKeeper>(config, "auxiliary_zookeepers." + name, getZooKeeperLog()))
+                        .first;
     }
     else if (zookeeper->second->expired())
         zookeeper->second = zookeeper->second->startNewSession();
@@ -3269,11 +3418,12 @@ void Context::setInterserverIOAddress(const String & host, UInt16 port)
 std::pair<String, UInt16> Context::getInterserverIOAddress() const
 {
     if (shared->interserver_io_host.empty() || shared->interserver_io_port == 0)
-        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-                        "Parameter 'interserver_http(s)_port' required for replication is not specified "
-                        "in configuration file.");
+        throw Exception(
+            ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+            "Parameter 'interserver_http(s)_port' required for replication is not specified "
+            "in configuration file.");
 
-    return { shared->interserver_io_host, shared->interserver_io_port };
+    return {shared->interserver_io_host, shared->interserver_io_port};
 }
 
 void Context::setInterserverScheme(const String & scheme)
@@ -3340,20 +3490,20 @@ UInt16 Context::getServerPort(const String & port_name) const
 
 void Context::setMaxPartNumToWarn(size_t max_part_to_warn)
 {
-   SharedLockGuard lock(shared->mutex);
-   shared->max_part_num_to_warn = max_part_to_warn;
+    SharedLockGuard lock(shared->mutex);
+    shared->max_part_num_to_warn = max_part_to_warn;
 }
 
 void Context::setMaxTableNumToWarn(size_t max_table_to_warn)
 {
-   SharedLockGuard lock(shared->mutex);
-   shared->max_table_num_to_warn= max_table_to_warn;
+    SharedLockGuard lock(shared->mutex);
+    shared->max_table_num_to_warn = max_table_to_warn;
 }
 
 void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn)
 {
-   SharedLockGuard lock(shared->mutex);
-   shared->max_database_num_to_warn= max_database_to_warn;
+    SharedLockGuard lock(shared->mutex);
+    shared->max_database_num_to_warn = max_database_to_warn;
 }
 
 std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) const
@@ -3449,9 +3599,7 @@ void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_dis
 {
     std::lock_guard lock(shared->clusters_mutex);
     if (ConfigHelper::getBool(*config, "allow_experimental_cluster_discovery") && enable_discovery && !shared->cluster_discovery)
-    {
         shared->cluster_discovery = std::make_unique<ClusterDiscovery>(*config, getGlobalContext());
-    }
 
     /// Do not update clusters if this part of config wasn't changed.
     if (shared->clusters && isSameConfiguration(*config, *shared->clusters_config, config_name))
@@ -3484,11 +3632,14 @@ void Context::initializeSystemLogs()
     /// triggered from another thread, that is launched while initializing the system logs,
     /// for example, system.filesystem_cache_log will be triggered by parts loading
     /// of any other table if it is stored on a disk with cache.
-    callOnce(shared->system_logs_initialized, [&] {
-        auto system_logs = std::make_unique<SystemLogs>(getGlobalContext(), getConfigRef());
-        std::lock_guard lock(shared->mutex);
-        shared->system_logs = std::move(system_logs);
-    });
+    callOnce(
+        shared->system_logs_initialized,
+        [&]
+        {
+            auto system_logs = std::make_unique<SystemLogs>(getGlobalContext(), getConfigRef());
+            std::lock_guard lock(shared->mutex);
+            shared->system_logs = std::move(system_logs);
+        });
 }
 
 void Context::initializeTraceCollector()
@@ -3829,7 +3980,8 @@ DiskSelectorPtr Context::getDiskSelector(std::lock_guard<std::mutex> & /* lock *
     return shared->merge_tree_disk_selector;
 }
 
-StoragePolicySelectorPtr Context::getStoragePolicySelector(std::lock_guard<std::mutex> & lock) const TSA_REQUIRES(shared->storage_policies_mutex)
+StoragePolicySelectorPtr Context::getStoragePolicySelector(std::lock_guard<std::mutex> & lock) const
+    TSA_REQUIRES(shared->storage_policies_mutex)
 {
     if (!shared->merge_tree_storage_policy_selector)
     {
@@ -3861,7 +4013,9 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
             catch (Exception & e)
             {
                 LOG_ERROR(
-                    shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message());
+                    shared->log,
+                    "An error has occurred while reloading storage policies, storage policies were not applied: {}",
+                    e.message());
             }
         }
 
@@ -3877,7 +4031,6 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
         if (shared->storage_s3_settings)
             shared->storage_s3_settings->loadFromConfig("s3", config, getSettingsRef());
     }
-
 }
 
 
@@ -3949,19 +4102,24 @@ void Context::checkCanBeDropped(const String & database, const String & table, c
 
     String size_str = formatReadableSizeWithDecimalSuffix(size);
     String max_size_to_drop_str = formatReadableSizeWithDecimalSuffix(max_size_to_drop);
-    throw Exception(ErrorCodes::TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT,
-                    "Table or Partition in {}.{} was not dropped.\nReason:\n"
-                    "1. Size ({}) is greater than max_[table/partition]_size_to_drop ({})\n"
-                    "2. File '{}' intended to force DROP {}\n"
-                    "How to fix this:\n"
-                    "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n"
-                    "2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
-                    "Example:\nsudo touch '{}' && sudo chmod 666 '{}'",
-                    backQuoteIfNeed(database), backQuoteIfNeed(table),
-                    size_str, max_size_to_drop_str,
-                    force_file.string(), force_file_exists ? "exists but not writeable (could not be removed)" : "doesn't exist",
-                    force_file.string(),
-                    force_file.string(), force_file.string());
+    throw Exception(
+        ErrorCodes::TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT,
+        "Table or Partition in {}.{} was not dropped.\nReason:\n"
+        "1. Size ({}) is greater than max_[table/partition]_size_to_drop ({})\n"
+        "2. File '{}' intended to force DROP {}\n"
+        "How to fix this:\n"
+        "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n"
+        "2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
+        "Example:\nsudo touch '{}' && sudo chmod 666 '{}'",
+        backQuoteIfNeed(database),
+        backQuoteIfNeed(table),
+        size_str,
+        max_size_to_drop_str,
+        force_file.string(),
+        force_file_exists ? "exists but not writeable (could not be removed)" : "doesn't exist",
+        force_file.string(),
+        force_file.string(),
+        force_file.string());
 }
 
 
@@ -4003,7 +4161,13 @@ void Context::checkPartitionCanBeDropped(const String & database, const String &
 }
 
 
-InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional<FormatSettings> & format_settings, const std::optional<size_t> max_parsing_threads) const
+InputFormatPtr Context::getInputFormat(
+    const String & name,
+    ReadBuffer & buf,
+    const Block & sample,
+    UInt64 max_block_size,
+    const std::optional<FormatSettings> & format_settings,
+    const std::optional<size_t> max_parsing_threads) const
 {
     return FormatFactory::instance().getInput(name, buf, sample, shared_from_this(), max_block_size, format_settings, max_parsing_threads);
 }
@@ -4202,9 +4366,9 @@ const IHostContextPtr & Context::getHostContext() const
 
 std::shared_ptr<ActionLocksManager> Context::getActionLocksManager() const
 {
-    callOnce(shared->action_locks_manager_initialized, [&] {
-        shared->action_locks_manager = std::make_shared<ActionLocksManager>(shared_from_this());
-    });
+    callOnce(
+        shared->action_locks_manager_initialized,
+        [&] { shared->action_locks_manager = std::make_shared<ActionLocksManager>(shared_from_this()); });
 
     return shared->action_locks_manager;
 }
@@ -4291,7 +4455,8 @@ void Context::setClientInterface(ClientInfo::Interface interface)
     need_recalculate_access = true;
 }
 
-void Context::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+void Context::setClientVersion(
+    UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
 {
     client_info.client_version_major = client_version_major;
     client_info.client_version_minor = client_version_minor;
@@ -4376,7 +4541,8 @@ void Context::setQuotaClientKey(const String & quota_key_)
     need_recalculate_access = true;
 }
 
-void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+void Context::setConnectionClientVersion(
+    UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
 {
     client_info.connection_client_version_major = client_version_major;
     client_info.connection_client_version_minor = client_version_minor;
@@ -4457,10 +4623,12 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     if (!storage_id.database_name.empty())
     {
         if (in_specified_database)
-            return storage_id;     /// NOTE There is no guarantees that table actually exists in database.
+            return storage_id; /// NOTE There is no guarantees that table actually exists in database.
         if (exception)
-            exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "External and temporary tables have no database, but {} is specified",
-                               storage_id.database_name));
+            exception->emplace(Exception(
+                ErrorCodes::UNKNOWN_TABLE,
+                "External and temporary tables have no database, but {} is specified",
+                storage_id.database_name));
         return StorageID::createEmpty();
     }
 
@@ -4545,9 +4713,11 @@ void Context::checkTransactionsAreAllowed(bool explicit_tcl_query /* = false */)
     if (explicit_tcl_query)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported");
 
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Experimental support for transactions is disabled, "
-                    "however, some query or background task tried to access TransactionLog. "
-                    "If you have not enabled this feature explicitly, then it's a bug.");
+    throw Exception(
+        ErrorCodes::LOGICAL_ERROR,
+        "Experimental support for transactions is disabled, "
+        "however, some query or background task tried to access TransactionLog. "
+        "If you have not enabled this feature explicitly, then it's a bug.");
 }
 
 void Context::initCurrentTransaction(MergeTreeTransactionPtr txn)
@@ -4708,47 +4878,55 @@ void Context::initializeBackgroundExecutorsIfNeeded()
     size_t background_common_pool_size = server_settings.background_common_pool_size;
 
     /// With this executor we can execute more tasks than threads we have
-    shared->merge_mutate_executor = std::make_shared<MergeMutateBackgroundExecutor>
-    (
+    shared->merge_mutate_executor = std::make_shared<MergeMutateBackgroundExecutor>(
         "MergeMutate",
-        /*max_threads_count*/background_pool_size,
-        /*max_tasks_count*/background_pool_max_tasks_count,
+        /*max_threads_count*/ background_pool_size,
+        /*max_tasks_count*/ background_pool_max_tasks_count,
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
         CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
-        background_merges_mutations_scheduling_policy
-    );
-    LOG_INFO(shared->log, "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
-        background_pool_size, background_pool_max_tasks_count, background_merges_mutations_scheduling_policy);
+        background_merges_mutations_scheduling_policy);
+    LOG_INFO(
+        shared->log,
+        "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
+        background_pool_size,
+        background_pool_max_tasks_count,
+        background_merges_mutations_scheduling_policy);
 
-    shared->moves_executor = std::make_shared<OrdinaryBackgroundExecutor>
-    (
+    shared->moves_executor = std::make_shared<OrdinaryBackgroundExecutor>(
         "Move",
         background_move_pool_size,
         background_move_pool_size,
         CurrentMetrics::BackgroundMovePoolTask,
-        CurrentMetrics::BackgroundMovePoolSize
-    );
-    LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
+        CurrentMetrics::BackgroundMovePoolSize);
+    LOG_INFO(
+        shared->log,
+        "Initialized background executor for move operations with num_threads={}, num_tasks={}",
+        background_move_pool_size,
+        background_move_pool_size);
 
-    shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>
-    (
+    shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>(
         "Fetch",
         background_fetches_pool_size,
         background_fetches_pool_size,
         CurrentMetrics::BackgroundFetchesPoolTask,
-        CurrentMetrics::BackgroundFetchesPoolSize
-    );
-    LOG_INFO(shared->log, "Initialized background executor for fetches with num_threads={}, num_tasks={}", background_fetches_pool_size, background_fetches_pool_size);
+        CurrentMetrics::BackgroundFetchesPoolSize);
+    LOG_INFO(
+        shared->log,
+        "Initialized background executor for fetches with num_threads={}, num_tasks={}",
+        background_fetches_pool_size,
+        background_fetches_pool_size);
 
-    shared->common_executor = std::make_shared<OrdinaryBackgroundExecutor>
-    (
+    shared->common_executor = std::make_shared<OrdinaryBackgroundExecutor>(
         "Common",
         background_common_pool_size,
         background_common_pool_size,
         CurrentMetrics::BackgroundCommonPoolTask,
-        CurrentMetrics::BackgroundCommonPoolSize
-    );
-    LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", background_common_pool_size, background_common_pool_size);
+        CurrentMetrics::BackgroundCommonPoolSize);
+    LOG_INFO(
+        shared->log,
+        "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}",
+        background_common_pool_size,
+        background_common_pool_size);
 
     shared->are_background_executors_initialized = true;
 }
@@ -4785,12 +4963,15 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const
 
 IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const
 {
-    callOnce(shared->readers_initialized, [&] {
-        const auto & config = getConfigRef();
-        shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config);
-        shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config);
-        shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config);
-    });
+    callOnce(
+        shared->readers_initialized,
+        [&]
+        {
+            const auto & config = getConfigRef();
+            shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config);
+            shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config);
+            shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config);
+        });
 
     switch (type)
     {
@@ -4805,14 +4986,22 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co
 
 ThreadPool & Context::getThreadPoolWriter() const
 {
-    callOnce(shared->threadpool_writer_initialized, [&] {
-        const auto & config = getConfigRef();
-        auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100);
-        auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
+    callOnce(
+        shared->threadpool_writer_initialized,
+        [&]
+        {
+            const auto & config = getConfigRef();
+            auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100);
+            auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
 
-        shared->threadpool_writer = std::make_unique<ThreadPool>(
-            CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size);
-    });
+            shared->threadpool_writer = std::make_unique<ThreadPool>(
+                CurrentMetrics::IOWriterThreads,
+                CurrentMetrics::IOWriterThreadsActive,
+                CurrentMetrics::IOWriterThreadsScheduled,
+                pool_size,
+                pool_size,
+                queue_size);
+        });
 
     return *shared->threadpool_writer;
 }
@@ -4856,10 +5045,7 @@ ReadSettings Context::getReadSettings() const
 
     /// Zero read buffer will not make progress.
     if (!settings.max_read_buffer_size)
-    {
-        throw Exception(ErrorCodes::INVALID_SETTING_VALUE,
-            "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size);
-    }
+        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size);
 
     res.local_fs_buffer_size
         = settings.max_read_buffer_size_local_fs ? settings.max_read_buffer_size_local_fs : settings.max_read_buffer_size;
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference
index 76d86352bfc..c0ad7354039 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.reference
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference
@@ -1,3 +1,3 @@
-Attached tables is more than 10
-Attached databases is more than 10
-Active parts is more than 10
+The number of attached tables is more than 10
+The number of attached databases is more than 10
+The number of active parts is more than 10
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index cd9a4ebe5fa..2e357a6080c 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -34,7 +34,7 @@ INSERT INTO test_max_num_to_warn_9 VALUES (1, 'Hello');
 INSERT INTO test_max_num_to_warn_10 VALUES (1, 'Hello');
 INSERT INTO test_max_num_to_warn_11 VALUES (1, 'Hello');
 
-SELECT * FROM system.warnings where message in ('Attached tables is more than 10', 'Attached databases is more than 10', 'Active parts is more than 10');
+SELECT * FROM system.warnings where message in ('The number of attached tables is more than 10', 'The number of attached databases is more than 10', 'The number of active parts is more than 10');
 
 DROP TABLE test_max_num_to_warn_1;
 DROP TABLE test_max_num_to_warn_2;

From 8ea9403d0f6072427f0bf61ec2a963d9c65726b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 5 Dec 2023 11:39:54 +0800
Subject: [PATCH 060/213] modify docs

---
 .../operations/server-configuration-parameters/settings.md  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 3c2604aed0f..8d5f0dd3c50 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -475,7 +475,7 @@ The value 0 means that you can delete all tables without any restrictions.
   
 
 ## max\_database\_num\_to\_warn {#max-database-num-to-warn}  
-If the number of attached databases exceeds the specified value, clickhouse server will add warning message to `system.warnings` table.    
+If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.    
 Default value: 1000
 
 **Example**
@@ -485,7 +485,7 @@ Default value: 1000
 ```
   
 ## max\_table\_num\_to\_warn {#max-table-num-to-warn}   
-If the number of attached tables exceeds the specified value, clickhouse server will add warning message to `system.warnings` table.  
+If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.  
 Default value: 5000    
 
 **Example**
@@ -496,7 +496,7 @@ Default value: 5000
 
 
 ## max\_part\_num\_to\_warn {#max-part-num-to-warn}  
-If the number of active parts exceeds the specified value, clickhouse server will add warning message to `system.warnings` table.  
+If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.  
 Default value: 100000  
 
 **Example**

From c41511e51c47c0b0bd1c49a79540bb870fd6dea0 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Tue, 5 Dec 2023 15:11:32 +0000
Subject: [PATCH 061/213] ignore_on_cluster_for_grants

---
 .../Access/InterpreterGrantQuery.cpp          |  6 ++--
 .../removeOnClusterClauseIfNeeded.cpp         |  4 ++-
 .../integration/test_replicated_users/test.py | 35 +++++++++++++++++++
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp
index 45e8ba9ea0d..259c6b39524 100644
--- a/src/Interpreters/Access/InterpreterGrantQuery.cpp
+++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp
@@ -7,6 +7,7 @@
 #include <Access/RolesOrUsersSet.h>
 #include <Access/User.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/removeOnClusterClauseIfNeeded.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
 #include <boost/range/algorithm/copy.hpp>
@@ -396,7 +397,8 @@ namespace
 
 BlockIO InterpreterGrantQuery::execute()
 {
-    auto & query = query_ptr->as<ASTGrantQuery &>();
+    const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext());
+    auto & query = updated_query->as<ASTGrantQuery &>();
 
     query.replaceCurrentUserTag(getContext()->getUserName());
     query.access_rights_elements.eraseNonGrantable();
@@ -430,7 +432,7 @@ BlockIO InterpreterGrantQuery::execute()
         current_user_access->checkGranteesAreAllowed(grantees);
         DDLQueryOnClusterParams params;
         params.access_to_check = std::move(required_access);
-        return executeDDLQueryOnCluster(query_ptr, getContext(), params);
+        return executeDDLQueryOnCluster(updated_query, getContext(), params);
     }
 
     /// Check if the current user has corresponding access rights granted with grant option.
diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp
index 7dc452a0fcb..da3930d62a6 100644
--- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp
+++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp
@@ -14,6 +14,7 @@
 #include <Parsers/Access/ASTCreateSettingsProfileQuery.h>
 #include <Parsers/Access/ASTCreateUserQuery.h>
 #include <Parsers/Access/ASTDropAccessEntityQuery.h>
+#include <Parsers/Access/ASTGrantQuery.h>
 
 
 namespace DB
@@ -33,7 +34,8 @@ static bool isAccessControlQuery(const ASTPtr & query)
         || query->as<ASTCreateRoleQuery>()
         || query->as<ASTCreateRowPolicyQuery>()
         || query->as<ASTCreateSettingsProfileQuery>()
-        || query->as<ASTDropAccessEntityQuery>();
+        || query->as<ASTDropAccessEntityQuery>()
+        || query->as<ASTGrantQuery>();
 }
 
 ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params)
diff --git a/tests/integration/test_replicated_users/test.py b/tests/integration/test_replicated_users/test.py
index 489724ed4fb..e34495a0071 100644
--- a/tests/integration/test_replicated_users/test.py
+++ b/tests/integration/test_replicated_users/test.py
@@ -114,6 +114,41 @@ def test_create_replicated_on_cluster_ignore(started_cluster, entity):
     node1.query(f"DROP {entity.keyword} {entity.name} {entity.options}")
 
 
+@pytest.mark.parametrize(
+    "use_on_cluster",
+    [
+        pytest.param(False, id="Without_on_cluster"),
+        pytest.param(True, id="With_ignored_on_cluster"),
+    ],
+)
+def test_grant_revoke_replicated(started_cluster, use_on_cluster: bool):
+    node1.replace_config(
+        "/etc/clickhouse-server/users.d/users.xml",
+        inspect.cleandoc(
+            f"""
+            <clickhouse>
+                <profiles>
+                    <default>
+                        <ignore_on_cluster_for_replicated_access_entities_queries>{int(use_on_cluster)}</ignore_on_cluster_for_replicated_access_entities_queries>
+                    </default>
+                </profiles>
+            </clickhouse>
+            """
+        ),
+    )
+    node1.query("SYSTEM RELOAD CONFIG")
+    on_cluster = "ON CLUSTER default" if use_on_cluster else ""
+
+    node1.query(f"CREATE USER theuser {on_cluster}")
+
+    assert node1.query(f"GRANT {on_cluster} SELECT ON *.* to theuser") == ""
+
+    assert node2.query(f"SHOW GRANTS FOR theuser") == "GRANT SELECT ON *.* TO theuser\n"
+
+    assert node1.query(f"REVOKE {on_cluster} SELECT ON *.* from theuser") == ""
+    node1.query(f"DROP USER theuser {on_cluster}")
+
+
 @pytest.mark.parametrize("entity", entities, ids=get_entity_id)
 def test_create_replicated_if_not_exists_on_cluster(started_cluster, entity):
     node1.query(

From 89b937339845741c5a564c420d1d101cf8c62759 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 5 Dec 2023 18:17:47 +0000
Subject: [PATCH 062/213] More random after tests are successful

---
 tests/clickhouse-test | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 006cc71bb4c..b596ac2a5fc 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -555,12 +555,12 @@ class SettingsRandomizer:
         "prefer_localhost_replica": lambda: random.randint(0, 1),
         "max_block_size": lambda: random.randint(8000, 100000),
         "max_threads": lambda: random.randint(1, 64),
-        "optimize_append_index": lambda: random.randint(1, 1),
-        "optimize_if_chain_to_multiif": lambda: random.randint(1, 1),
-        "optimize_if_transform_strings_to_enum": lambda: random.randint(1, 1),
+        "optimize_append_index": lambda: random.randint(0, 1),
+        "optimize_if_chain_to_multiif": lambda: random.randint(0, 1),
+        "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1),
         "optimize_read_in_order": lambda: random.randint(0, 1),
-        "optimize_or_like_chain": lambda: random.randint(1, 1),
-        "optimize_substitute_columns": lambda: random.randint(1, 1),
+        "optimize_or_like_chain": lambda: random.randint(0, 1),
+        "optimize_substitute_columns": lambda: random.randint(0, 1),
         "enable_multiple_prewhere_read_steps": lambda: random.randint(0, 1),
         "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
         "optimize_aggregation_in_order": lambda: random.randint(0, 1),

From e9d9048903a7bfb1dd61129ea4d3107ac2f7ff85 Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Tue, 5 Dec 2023 17:14:10 -0400
Subject: [PATCH 063/213] Changes 'cannot run on cloud' message.

---
 .../table-engines/special/distributed.md      | 24 +++++++------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index c484d0803c3..6224c450ea2 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -6,12 +6,11 @@ slug: /en/engines/table-engines/special/distributed
 
 # Distributed Table Engine
 
-import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+:::warning
+To create a distributed table engine in the cloud, you can use the [remote and remoteSecure](../../../sql-reference/table-functions/remote) table functions. The `Distributed(...)` syntax cannot be used in ClickHouse Cloud.
+:::
 
-<SelfManaged />
-
-Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers.
-Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
+Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
 
 ## Creating a Table {#distributed-creating-a-table}
 
@@ -26,6 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 ```
 
 ### From a Table {#distributed-from-a-table}
+
 When the `Distributed` table is pointing to a table on the current server you can adopt that table's schema:
 
 ``` sql
@@ -52,7 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
 
 Specifying the `sharding_key` is necessary for the following:
 
-- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key
+- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key.
 - For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried
 
 #### policy_name
@@ -126,9 +126,7 @@ SETTINGS
     fsync_directories=0;
 ```
 
-Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster.
-Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
-For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
+Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster. Data is not only read but is partially processed on the remote servers (to the extent that this is possible). For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
 
 Instead of the database name, you can use a constant expression that returns a string. For example: `currentDatabase()`.
 
@@ -187,9 +185,7 @@ Clusters are configured in the [server configuration file](../../../operations/c
 </remote_servers>
 ```
 
-Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas.
-Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
-Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
+Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
 
 Cluster names must not contain dots.
 
@@ -202,9 +198,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com
 - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
 - `compression` - Use data compression. Default value: `true`.
 
-When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
-If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
-This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
+When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
 
 You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard.
 

From 806061642a4e45bae2d733d27b6c6482156f7360 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 6 Dec 2023 11:39:04 +0100
Subject: [PATCH 064/213] Support GENERATED and DEFAULT columns

---
 src/Core/ExternalResultDescription.cpp        |   5 +
 src/Core/ExternalResultDescription.h          |   3 +
 src/Core/PostgreSQL/insertPostgreSQLValue.cpp |   8 +-
 src/Core/PostgreSQL/insertPostgreSQLValue.h   |   2 +-
 .../fetchPostgreSQLTableStructure.cpp         |  71 +++-
 .../fetchPostgreSQLTableStructure.h           |   6 +-
 .../MaterializedPostgreSQLConsumer.cpp        | 331 ++++++++++--------
 .../MaterializedPostgreSQLConsumer.h          |  48 +--
 .../PostgreSQLReplicationHandler.cpp          |  19 +-
 .../StorageMaterializedPostgreSQL.cpp         |  38 +-
 .../StorageMaterializedPostgreSQL.h           |   3 +-
 .../test.py                                   | 144 ++++++++
 12 files changed, 471 insertions(+), 207 deletions(-)

diff --git a/src/Core/ExternalResultDescription.cpp b/src/Core/ExternalResultDescription.cpp
index 0700200a9ec..f7e8a69d355 100644
--- a/src/Core/ExternalResultDescription.cpp
+++ b/src/Core/ExternalResultDescription.cpp
@@ -20,6 +20,11 @@ namespace ErrorCodes
     extern const int UNKNOWN_TYPE;
 }
 
+ExternalResultDescription::ExternalResultDescription(const Block & sample_block_)
+{
+    init(sample_block_);
+}
+
 void ExternalResultDescription::init(const Block & sample_block_)
 {
     sample_block = sample_block_;
diff --git a/src/Core/ExternalResultDescription.h b/src/Core/ExternalResultDescription.h
index a9ffe8b2ed2..b7d852b99cf 100644
--- a/src/Core/ExternalResultDescription.h
+++ b/src/Core/ExternalResultDescription.h
@@ -41,6 +41,9 @@ struct ExternalResultDescription
     Block sample_block;
     std::vector<std::pair<ValueType, bool /* is_nullable */>> types;
 
+    ExternalResultDescription() = default;
+    explicit ExternalResultDescription(const Block & sample_block_);
+
     void init(const Block & sample_block_);
 };
 
diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
index d2e8071c5de..2f041134f06 100644
--- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
+++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
@@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum
 void insertPostgreSQLValue(
         IColumn & column, std::string_view value,
         const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
-        std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
+        const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
 {
     switch (type)
     {
@@ -125,8 +125,8 @@ void insertPostgreSQLValue(
             pqxx::array_parser parser{value};
             std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
 
-            size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
-            const auto parse_value = array_info[idx].pqxx_parser;
+            size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info.at(idx).num_dimensions;
+            const auto parse_value = array_info.at(idx).pqxx_parser;
             std::vector<Row> dimensions(expected_dimensions + 1);
 
             while (parsed.first != pqxx::array_parser::juncture::done)
@@ -138,7 +138,7 @@ void insertPostgreSQLValue(
                     dimensions[dimension].emplace_back(parse_value(parsed.second));
 
                 else if (parsed.first == pqxx::array_parser::juncture::null_value)
-                    dimensions[dimension].emplace_back(array_info[idx].default_value);
+                    dimensions[dimension].emplace_back(array_info.at(idx).default_value);
 
                 else if (parsed.first == pqxx::array_parser::juncture::row_end)
                 {
diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.h b/src/Core/PostgreSQL/insertPostgreSQLValue.h
index b842d86ed47..3bc83292b96 100644
--- a/src/Core/PostgreSQL/insertPostgreSQLValue.h
+++ b/src/Core/PostgreSQL/insertPostgreSQLValue.h
@@ -23,7 +23,7 @@ struct PostgreSQLArrayInfo
 void insertPostgreSQLValue(
         IColumn & column, std::string_view value,
         const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
-        std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
+        const std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
 
 void preparePostgreSQLArrayInfo(
         std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 2c97c92ba99..527936f1c19 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -186,20 +186,25 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
             }
             else
             {
-                std::tuple<std::string, std::string, std::string, uint16_t, std::string, std::string> row;
+                std::tuple<std::string, std::string, std::string, uint16_t, std::string, std::string, std::string> row;
                 while (stream >> row)
                 {
-                    auto data_type = convertPostgreSQLDataType(
+                    const auto column_name = std::get<0>(row);
+                    const auto data_type = convertPostgreSQLDataType(
                         std::get<1>(row), recheck_array,
                         use_nulls && (std::get<2>(row) == /* not nullable */"f"),
                         std::get<3>(row));
 
-                    columns.push_back(NameAndTypePair(std::get<0>(row), data_type));
+                    columns.push_back(NameAndTypePair(column_name, data_type));
+                    auto attgenerated = std::get<6>(row);
+                    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: attgenerated: {}", attgenerated);
 
-                    attributes.emplace_back(
-                    PostgreSQLTableStructure::PGAttribute{
-                        .atttypid = parse<int>(std::get<4>(row)),
-                        .atttypmod = parse<int>(std::get<5>(row)),
+                    attributes.emplace(
+                        column_name,
+                        PostgreSQLTableStructure::PGAttribute{
+                            .atttypid = parse<int>(std::get<4>(row)),
+                            .atttypmod = parse<int>(std::get<5>(row)),
+                            .attgenerated = attgenerated.empty() ? char{} : char(attgenerated[0])
                     });
 
                     ++i;
@@ -253,14 +258,19 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
     PostgreSQLTableStructure table;
 
     auto where = fmt::format("relname = {}", quoteString(postgres_table));
-    if (postgres_schema.empty())
-        where += " AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public')";
-    else
-        where += fmt::format(" AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = {})", quoteString(postgres_schema));
+
+    where += postgres_schema.empty()
+        ? " AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public')"
+        : fmt::format(" AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = {})", quoteString(postgres_schema));
 
     std::string query = fmt::format(
-           "SELECT attname AS name, format_type(atttypid, atttypmod) AS type, "
-           "attnotnull AS not_null, attndims AS dims, atttypid as type_id, atttypmod as type_modifier "
+           "SELECT attname AS name, " /// column name
+           "format_type(atttypid, atttypmod) AS type, " /// data type
+           "attnotnull AS not_null, " /// is nullable
+           "attndims AS dims, " /// array dimensions
+           "atttypid as type_id, "
+           "atttypmod as type_modifier, "
+           "attgenerated as generated " /// if column has GENERATED
            "FROM pg_attribute "
            "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
            "AND NOT attisdropped AND attnum > 0", where);
@@ -271,11 +281,44 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
     if (!table.physical_columns)
         throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", postgres_table_with_schema);
 
+    for (const auto & column : table.physical_columns->columns)
+    {
+        table.physical_columns->names.push_back(column.name);
+    }
+
+    bool check_generated = table.physical_columns->attributes.end() != std::find_if(
+        table.physical_columns->attributes.begin(),
+        table.physical_columns->attributes.end(),
+        [](const auto & attr){ return attr.second.attgenerated == 's'; });
+
+    if (check_generated)
+    {
+        std::string attrdef_query = fmt::format(
+            "SELECT adnum, pg_get_expr(adbin, adrelid) as generated_expression "
+            "FROM pg_attrdef "
+            "WHERE adrelid = (SELECT oid FROM pg_class WHERE {});", where);
+
+        pqxx::result result{tx.exec(attrdef_query)};
+        for (const auto row : result)
+        {
+            size_t adnum = row[0].as<int>();
+            if (!adnum || adnum > table.physical_columns->names.size())
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                                "Received adnum {}, but currently fetched columns list has {} columns",
+                                adnum, table.physical_columns->attributes.size());
+            }
+            const auto column_name = table.physical_columns->names[adnum - 1];
+            table.physical_columns->attributes.at(column_name).attr_def = row[1].as<std::string>();
+        }
+    }
+
     if (with_primary_key)
     {
         /// wiki.postgresql.org/wiki/Retrieve_primary_key_columns
         query = fmt::format(
-                "SELECT a.attname, format_type(a.atttypid, a.atttypmod) AS data_type "
+                "SELECT a.attname, " /// column name
+                "format_type(a.atttypid, a.atttypmod) AS data_type " /// data type
                 "FROM pg_index i "
                 "JOIN pg_attribute a ON a.attrelid = i.indrelid "
                 "AND a.attnum = ANY(i.indkey) "
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
index 7cd21d353a2..81bf7b278fc 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
@@ -16,13 +16,17 @@ struct PostgreSQLTableStructure
     {
         Int32 atttypid;
         Int32 atttypmod;
+        bool atthasdef;
+        char attgenerated;
+        std::string attr_def;
     };
-    using Attributes = std::vector<PGAttribute>;
+    using Attributes = std::unordered_map<std::string, PGAttribute>;
 
     struct ColumnsInfo
     {
         NamesAndTypesList columns;
         Attributes attributes;
+        std::vector<std::string> names;
         ColumnsInfo(NamesAndTypesList && columns_, Attributes && attributes_) : columns(columns_), attributes(attributes_) {}
     };
     using ColumnsInfoPtr = std::shared_ptr<ColumnsInfo>;
diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
index e7bd6be7b2b..6be1563d16c 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@@ -24,6 +24,22 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+namespace
+{
+    using ArrayInfo = std::unordered_map<size_t, PostgreSQLArrayInfo>;
+
+    ArrayInfo createArrayInfos(const NamesAndTypesList & columns, const ExternalResultDescription & columns_description)
+    {
+        ArrayInfo array_info;
+        for (size_t i = 0; i < columns.size(); ++i)
+        {
+            if (columns_description.types[i].first == ExternalResultDescription::ValueType::vtArray)
+                preparePostgreSQLArrayInfo(array_info, i, columns_description.sample_block.getByPosition(i).type);
+        }
+        return array_info;
+    }
+}
+
 MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer(
     ContextPtr context_,
     std::shared_ptr<postgres::Connection> connection_,
@@ -40,126 +56,160 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer(
     , publication_name(publication_name_)
     , connection(connection_)
     , current_lsn(start_lsn)
+    , final_lsn(start_lsn)
     , lsn_value(getLSNValue(start_lsn))
     , max_block_size(max_block_size_)
     , schema_as_a_part_of_table_name(schema_as_a_part_of_table_name_)
 {
-    final_lsn = start_lsn;
-    auto tx = std::make_shared<pqxx::nontransaction>(connection->getRef());
-    current_lsn = advanceLSN(tx);
-    LOG_TRACE(log, "Starting replication. LSN: {} (last: {})", getLSNValue(current_lsn), getLSNValue(final_lsn));
-    tx->commit();
-
-    for (const auto & [table_name, storage_info] : storages_info_)
-        storages.emplace(table_name, storage_info);
-}
-
-
-MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & storage_info)
-    : storage(storage_info.storage), buffer(storage_info.storage->getInMemoryMetadataPtr(), storage_info.attributes)
-{
-    auto table_id = storage_info.storage->getStorageID();
-    LOG_TRACE(&Poco::Logger::get("StorageMaterializedPostgreSQL"),
-              "New buffer for table {}, number of attributes: {}, number if columns: {}, structure: {}",
-              table_id.getNameForLogs(), buffer.attributes.size(), buffer.getColumnsNum(), buffer.description.sample_block.dumpStructure());
-}
-
-
-MaterializedPostgreSQLConsumer::StorageData::Buffer::Buffer(
-    StorageMetadataPtr storage_metadata, const PostgreSQLTableStructure::Attributes & attributes_)
-    : attributes(attributes_)
-{
-    const Block sample_block = storage_metadata->getSampleBlock();
-
-    /// Need to clear type, because in description.init() the types are appended
-    description.types.clear();
-    description.init(sample_block);
-
-    columns = description.sample_block.cloneEmptyColumns();
-    const auto & storage_columns = storage_metadata->getColumns().getAllPhysical();
-    auto insert_columns = std::make_shared<ASTExpressionList>();
-
-    auto columns_num = description.sample_block.columns();
-    assert(columns_num == storage_columns.size());
-    if (attributes.size() + 2 != columns_num) /// +2 because sign and version columns
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns number mismatch. Attributes: {}, buffer: {}",
-                        attributes.size(), columns_num);
-
-    size_t idx = 0;
-    for (const auto & column : storage_columns)
     {
-        if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray)
-            preparePostgreSQLArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type);
-        idx++;
-
-        insert_columns->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
+        auto tx = std::make_shared<pqxx::nontransaction>(connection->getRef());
+        current_lsn = advanceLSN(tx);
+        tx->commit();
     }
 
-    columns_ast = std::move(insert_columns);
+    for (const auto & [table_name, storage_info] : storages_info_)
+        storages.emplace(table_name, StorageData(storage_info, log));
+
+    LOG_TRACE(log, "Starting replication. LSN: {} (last: {}), storages: {}",
+              getLSNValue(current_lsn), getLSNValue(final_lsn), storages.size());
 }
 
 
-void MaterializedPostgreSQLConsumer::assertCorrectInsertion(StorageData::Buffer & buffer, size_t column_idx)
+MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & storage_info, Poco::Logger * log_)
+    : storage(storage_info.storage)
+    , table_description(storage_info.storage->getInMemoryMetadataPtr()->getSampleBlock())
+    , columns_attributes(storage_info.attributes)
+    , array_info(createArrayInfos(storage_info.storage->getInMemoryMetadataPtr()->getColumns().getAllPhysical(), table_description))
 {
-    if (column_idx >= buffer.description.sample_block.columns()
-        || column_idx >= buffer.description.types.size()
-        || column_idx >= buffer.columns.size())
-        throw Exception(
-                        ErrorCodes::LOGICAL_ERROR,
+    auto columns_num = table_description.sample_block.columns();
+    /// +2 because of _sign and _version columns
+    if (columns_attributes.size() + 2 != columns_num)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Columns number mismatch. Attributes: {}, buffer: {}",
+                        columns_attributes.size(), columns_num);
+    }
+
+    LOG_TRACE(log_, "Adding definition for table {}, structure: {}",
+              storage_info.storage->getStorageID().getNameForLogs(),
+              table_description.sample_block.dumpStructure());
+}
+
+MaterializedPostgreSQLConsumer::StorageData::Buffer::Buffer(
+    ColumnsWithTypeAndName && columns_,
+    const ExternalResultDescription & table_description_)
+{
+    if (columns_.end() != std::find_if(
+            columns_.begin(), columns_.end(),
+            [](const auto & col) { return col.name == "_sign" || col.name == "_version"; }))
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "PostgreSQL table cannot contain `_sign` or `_version` columns "
+                        "as they are reserved for internal usage");
+    }
+
+    columns_.push_back(table_description_.sample_block.getByName("_sign"));
+    columns_.push_back(table_description_.sample_block.getByName("_version"));
+
+    for (const auto & col : columns_)
+    {
+        if (!table_description_.sample_block.has(col.name))
+        {
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Having column {}, but no such column in table ({})",
+                            col.name, table_description_.sample_block.dumpStructure());
+        }
+
+        const auto & actual_column = table_description_.sample_block.getByName(col.name);
+        if (col.type != actual_column.type)
+        {
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Having column {} of type {}, but expected {}",
+                            col.name, col.type->getName(), actual_column.type->getName());
+        }
+    }
+
+    sample_block = Block(columns_);
+    columns = sample_block.cloneEmptyColumns();
+
+    for (const auto & name : sample_block.getNames())
+        columns_ast.children.emplace_back(std::make_shared<ASTIdentifier>(name));
+}
+
+MaterializedPostgreSQLConsumer::StorageData::Buffer & MaterializedPostgreSQLConsumer::StorageData::getBuffer()
+{
+    if (!buffer)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Data buffer not initialized for {}",
+                        storage->getStorageID().getNameForLogs());
+    }
+
+    return *buffer;
+}
+
+void MaterializedPostgreSQLConsumer::StorageData::Buffer::assertInsertIsPossible(size_t col_idx) const
+{
+    if (col_idx >= columns.size())
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
                         "Attempt to insert into buffer at position: "
-                        "{}, but block columns size is {}, types size: {}, columns size: {}, buffer structure: {}",
-                        column_idx,
-                        buffer.description.sample_block.columns(),
-                        buffer.description.types.size(), buffer.columns.size(),
-                        buffer.description.sample_block.dumpStructure());
+                        "{}, but block columns size is {} (full structure: {})",
+                        col_idx, columns.size(), sample_block.dumpStructure());
+    }
 }
 
 
-void MaterializedPostgreSQLConsumer::insertValue(StorageData::Buffer & buffer, const std::string & value, size_t column_idx)
+void MaterializedPostgreSQLConsumer::insertValue(StorageData & storage_data, const std::string & value, size_t column_idx)
 {
-    assertCorrectInsertion(buffer, column_idx);
+    auto & buffer = storage_data.getBuffer();
+    buffer.assertInsertIsPossible(column_idx);
 
-    const auto & sample = buffer.description.sample_block.getByPosition(column_idx);
-    bool is_nullable = buffer.description.types[column_idx].second;
+    const auto & column_type_and_name = buffer.sample_block.getByPosition(column_idx);
+    auto & column = buffer.columns[column_idx];
+
+    const size_t column_idx_in_table = storage_data.table_description.sample_block.getPositionByName(column_type_and_name.name);
+    const auto & type_description = storage_data.table_description.types[column_idx_in_table];
 
     try
     {
-        if (is_nullable)
+        if (column_type_and_name.type->isNullable())
         {
-            ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*buffer.columns[column_idx]);
-            const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
+            ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*column);
+            const auto & data_type = assert_cast<const DataTypeNullable &>(*column_type_and_name.type);
 
             insertPostgreSQLValue(
-                    column_nullable.getNestedColumn(), value,
-                    buffer.description.types[column_idx].first, data_type.getNestedType(), buffer.array_info, column_idx);
+                    column_nullable.getNestedColumn(), value, type_description.first,
+                    data_type.getNestedType(), storage_data.array_info, column_idx_in_table);
 
             column_nullable.getNullMapData().emplace_back(0);
         }
         else
         {
             insertPostgreSQLValue(
-                    *buffer.columns[column_idx], value,
-                    buffer.description.types[column_idx].first, sample.type,
-                    buffer.array_info, column_idx);
+                *column, value, type_description.first, column_type_and_name.type,
+                storage_data.array_info, column_idx_in_table);
         }
     }
     catch (const pqxx::conversion_error & e)
     {
-        LOG_ERROR(log, "Conversion failed while inserting PostgreSQL value {}, will insert default value. Error: {}", value, e.what());
-        insertDefaultValue(buffer, column_idx);
+        LOG_ERROR(log, "Conversion failed while inserting PostgreSQL value {}, "
+                  "will insert default value. Error: {}", value, e.what());
+
+        insertDefaultPostgreSQLValue(*column, *column_type_and_name.column);
     }
 }
 
-
-void MaterializedPostgreSQLConsumer::insertDefaultValue(StorageData::Buffer & buffer, size_t column_idx)
+void MaterializedPostgreSQLConsumer::insertDefaultValue(StorageData & storage_data, size_t column_idx)
 {
-    assertCorrectInsertion(buffer, column_idx);
+    auto & buffer = storage_data.getBuffer();
+    buffer.assertInsertIsPossible(column_idx);
 
-    const auto & sample = buffer.description.sample_block.getByPosition(column_idx);
-    insertDefaultPostgreSQLValue(*buffer.columns[column_idx], *sample.column);
+    const auto & column_type_and_name = buffer.sample_block.getByPosition(column_idx);
+    auto & column = buffer.columns[column_idx];
+
+    insertDefaultPostgreSQLValue(*column, *column_type_and_name.column);
 }
 
-
 void MaterializedPostgreSQLConsumer::readString(const char * message, size_t & pos, size_t size, String & result)
 {
     assert(size > pos + 2);
@@ -173,7 +223,6 @@ void MaterializedPostgreSQLConsumer::readString(const char * message, size_t & p
     }
 }
 
-
 template<typename T>
 T MaterializedPostgreSQLConsumer::unhexN(const char * message, size_t pos, size_t n)
 {
@@ -186,7 +235,6 @@ T MaterializedPostgreSQLConsumer::unhexN(const char * message, size_t pos, size_
     return result;
 }
 
-
 Int64 MaterializedPostgreSQLConsumer::readInt64(const char * message, size_t & pos, [[maybe_unused]] size_t size)
 {
     assert(size >= pos + 16);
@@ -195,7 +243,6 @@ Int64 MaterializedPostgreSQLConsumer::readInt64(const char * message, size_t & p
     return result;
 }
 
-
 Int32 MaterializedPostgreSQLConsumer::readInt32(const char * message, size_t & pos, [[maybe_unused]] size_t size)
 {
     assert(size >= pos + 8);
@@ -204,7 +251,6 @@ Int32 MaterializedPostgreSQLConsumer::readInt32(const char * message, size_t & p
     return result;
 }
 
-
 Int16 MaterializedPostgreSQLConsumer::readInt16(const char * message, size_t & pos, [[maybe_unused]] size_t size)
 {
     assert(size >= pos + 4);
@@ -213,7 +259,6 @@ Int16 MaterializedPostgreSQLConsumer::readInt16(const char * message, size_t & p
     return result;
 }
 
-
 Int8 MaterializedPostgreSQLConsumer::readInt8(const char * message, size_t & pos, [[maybe_unused]] size_t size)
 {
     assert(size >= pos + 2);
@@ -222,25 +267,23 @@ Int8 MaterializedPostgreSQLConsumer::readInt8(const char * message, size_t & pos
     return result;
 }
 
-
 void MaterializedPostgreSQLConsumer::readTupleData(
-        StorageData::Buffer & buffer, const char * message, size_t & pos, [[maybe_unused]] size_t size, PostgreSQLQuery type, bool old_value)
+    StorageData & storage_data,
+    const char * message,
+    size_t & pos,
+    size_t size,
+    PostgreSQLQuery type,
+    bool old_value)
 {
     Int16 num_columns = readInt16(message, pos, size);
 
-    /// Sanity check. In fact, it was already checked.
-    if (static_cast<size_t>(num_columns) + 2 != buffer.getColumnsNum()) /// +2 -- sign and version columns
-        throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR,
-                        "Number of columns does not match. Got: {}, expected {}, current buffer structure: {}",
-                        num_columns, buffer.getColumnsNum(), buffer.description.sample_block.dumpStructure());
-
     auto proccess_column_value = [&](Int8 identifier, Int16 column_idx)
     {
         switch (identifier) // NOLINT(bugprone-switch-missing-default-case)
         {
             case 'n': /// NULL
             {
-                insertDefaultValue(buffer, column_idx);
+                insertDefaultValue(storage_data, column_idx);
                 break;
             }
             case 't': /// Text formatted value
@@ -250,7 +293,7 @@ void MaterializedPostgreSQLConsumer::readTupleData(
                 for (Int32 i = 0; i < col_len; ++i)
                     value += readInt8(message, pos, size);
 
-                insertValue(buffer, value, column_idx);
+                insertValue(storage_data, value, column_idx);
                 break;
             }
             case 'u': /// TOAST value && unchanged at the same time. Actual value is not sent.
@@ -258,13 +301,13 @@ void MaterializedPostgreSQLConsumer::readTupleData(
                 /// TOAST values are not supported. (TOAST values are values that are considered in postgres
                 /// to be too large to be stored directly)
                 LOG_WARNING(log, "Got TOAST value, which is not supported, default value will be used instead.");
-                insertDefaultValue(buffer, column_idx);
+                insertDefaultValue(storage_data, column_idx);
                 break;
             }
             case 'b': /// Binary data.
             {
                 LOG_WARNING(log, "We do not yet process this format of data, will insert default value");
-                insertDefaultValue(buffer, column_idx);
+                insertDefaultValue(storage_data, column_idx);
                 break;
             }
             default:
@@ -272,7 +315,7 @@ void MaterializedPostgreSQLConsumer::readTupleData(
                 LOG_WARNING(log, "Unexpected identifier: {}. This is a bug! Please report an issue on github", identifier);
                 chassert(false);
 
-                insertDefaultValue(buffer, column_idx);
+                insertDefaultValue(storage_data, column_idx);
                 break;
             }
         }
@@ -291,7 +334,7 @@ void MaterializedPostgreSQLConsumer::readTupleData(
                       "Got error while receiving value for column {}, will insert default value. Error: {}",
                       column_idx, getCurrentExceptionMessage(true));
 
-            insertDefaultValue(buffer, column_idx);
+            insertDefaultValue(storage_data, column_idx);
             /// Let's collect only the first exception.
             /// This delaying of error throw is needed because
             /// some errors can be ignored and just logged,
@@ -301,19 +344,20 @@ void MaterializedPostgreSQLConsumer::readTupleData(
         }
     }
 
+    auto & columns = storage_data.getBuffer().columns;
     switch (type)
     {
         case PostgreSQLQuery::INSERT:
         {
-            buffer.columns[num_columns]->insert(static_cast<Int8>(1));
-            buffer.columns[num_columns + 1]->insert(lsn_value);
+            columns[num_columns]->insert(static_cast<Int8>(1));
+            columns[num_columns + 1]->insert(lsn_value);
 
             break;
         }
         case PostgreSQLQuery::DELETE:
         {
-            buffer.columns[num_columns]->insert(static_cast<Int8>(-1));
-            buffer.columns[num_columns + 1]->insert(lsn_value);
+            columns[num_columns]->insert(static_cast<Int8>(-1));
+            columns[num_columns + 1]->insert(lsn_value);
 
             break;
         }
@@ -321,11 +365,11 @@ void MaterializedPostgreSQLConsumer::readTupleData(
         {
             /// Process old value in case changed value is a primary key.
             if (old_value)
-                buffer.columns[num_columns]->insert(static_cast<Int8>(-1));
+                columns[num_columns]->insert(static_cast<Int8>(-1));
             else
-                buffer.columns[num_columns]->insert(static_cast<Int8>(1));
+                columns[num_columns]->insert(static_cast<Int8>(1));
 
-            buffer.columns[num_columns + 1]->insert(lsn_value);
+            columns[num_columns + 1]->insert(lsn_value);
 
             break;
         }
@@ -335,7 +379,6 @@ void MaterializedPostgreSQLConsumer::readTupleData(
         std::rethrow_exception(error);
 }
 
-
 /// https://www.postgresql.org/docs/13/protocol-logicalrep-message-formats.html
 void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * replication_message, size_t size)
 {
@@ -366,10 +409,10 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
                 return;
 
             Int8 new_tuple = readInt8(replication_message, pos, size);
-            auto & buffer = storages.find(table_name)->second.buffer;
+            auto & storage_data = storages.find(table_name)->second;
 
             if (new_tuple)
-                readTupleData(buffer, replication_message, pos, size, PostgreSQLQuery::INSERT);
+                readTupleData(storage_data, replication_message, pos, size, PostgreSQLQuery::INSERT);
 
             break;
         }
@@ -386,7 +429,7 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
             if (!isSyncAllowed(relation_id, table_name))
                 return;
 
-            auto & buffer = storages.find(table_name)->second.buffer;
+            auto & storage_data = storages.find(table_name)->second;
 
             auto proccess_identifier = [&](Int8 identifier) -> bool
             {
@@ -401,13 +444,13 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
                     /// it is much more efficient to use replica identity index, but support all possible cases.
                     case 'O':
                     {
-                        readTupleData(buffer, replication_message, pos, size, PostgreSQLQuery::UPDATE, true);
+                        readTupleData(storage_data, replication_message, pos, size, PostgreSQLQuery::UPDATE, true);
                         break;
                     }
                     case 'N':
                     {
                         /// New row.
-                        readTupleData(buffer, replication_message, pos, size, PostgreSQLQuery::UPDATE);
+                        readTupleData(storage_data, replication_message, pos, size, PostgreSQLQuery::UPDATE);
                         read_next = false;
                         break;
                     }
@@ -441,8 +484,8 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
              /// 0 or 1 if replica identity is set to full. For now only default replica identity is supported (with primary keys).
             readInt8(replication_message, pos, size);
 
-            auto & buffer = storages.find(table_name)->second.buffer;
-            readTupleData(buffer, replication_message, pos, size, PostgreSQLQuery::DELETE);
+            auto & storage_data = storages.find(table_name)->second;
+            readTupleData(storage_data, replication_message, pos, size, PostgreSQLQuery::DELETE);
             break;
         }
         case 'C': // Commit
@@ -490,8 +533,6 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
                 return;
             }
 
-            auto & buffer = storage_iter->second.buffer;
-
             /// 'd' - default (primary key if any)
             /// 'n' - nothing
             /// 'f' - all columns (set replica identity full)
@@ -509,29 +550,13 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
 
             Int16 num_columns = readInt16(replication_message, pos, size);
 
-            if (static_cast<size_t>(num_columns) + 2 != buffer.getColumnsNum()) /// +2 -- sign and version columns
-            {
-                markTableAsSkipped(relation_id, table_name);
-                return;
-            }
-
-            if (static_cast<size_t>(num_columns) != buffer.attributes.size())
-            {
-#ifndef NDEBUG
-                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Mismatch in attributes size. Got {}, expected {}. It's a bug. Current buffer structure: {}",
-                                num_columns, buffer.attributes.size(), buffer.description.sample_block.dumpStructure());
-#else
-                LOG_ERROR(log, "Mismatch in attributes size. Got {}, expected {}. It's a bug. Current buffer structure: {}",
-                          num_columns, buffer.attributes.size(), buffer.description.sample_block.dumpStructure());
-                markTableAsSkipped(relation_id, table_name);
-                return;
-#endif
-            }
-
             Int32 data_type_id;
             Int32 type_modifier; /// For example, n in varchar(n)
 
+            auto & storage_data = storage_iter->second;
+            const auto & description = storage_data.table_description;
+
+            ColumnsWithTypeAndName columns;
             for (uint16_t i = 0; i < num_columns; ++i)
             {
                 String column_name;
@@ -541,13 +566,22 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
                 data_type_id = readInt32(replication_message, pos, size);
                 type_modifier = readInt32(replication_message, pos, size);
 
-                if (buffer.attributes[i].atttypid != data_type_id || buffer.attributes[i].atttypmod != type_modifier)
+                columns.push_back(description.sample_block.getByName(column_name));
+
+                const auto & attributes_it = storage_data.columns_attributes.find(column_name);
+                if (attributes_it == storage_data.columns_attributes.end())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column: {}", column_name);
+
+                const auto & attributes = attributes_it->second;
+                if (attributes.atttypid != data_type_id || attributes.atttypmod != type_modifier)
                 {
+                    LOG_TEST(log, "Column {} has a different type", column_name);
                     markTableAsSkipped(relation_id, table_name);
                     return;
                 }
             }
 
+            storage_data.setBuffer(std::make_unique<StorageData::Buffer>(std::move(columns), description));
             tables_to_sync.insert(table_name);
             break;
         }
@@ -563,7 +597,6 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
     }
 }
 
-
 void MaterializedPostgreSQLConsumer::syncTables()
 {
     size_t synced_tables = 0;
@@ -571,8 +604,8 @@ void MaterializedPostgreSQLConsumer::syncTables()
     {
         auto table_name = *tables_to_sync.begin();
         auto & storage_data = storages.find(table_name)->second;
-        Block result_rows = storage_data.buffer.description.sample_block.cloneWithColumns(std::move(storage_data.buffer.columns));
-        storage_data.buffer.columns = storage_data.buffer.description.sample_block.cloneEmptyColumns();
+        auto & buffer = storage_data.getBuffer();
+        Block result_rows = buffer.sample_block.cloneWithColumns(std::move(buffer.columns));
 
         try
         {
@@ -585,7 +618,7 @@ void MaterializedPostgreSQLConsumer::syncTables()
 
                 auto insert = std::make_shared<ASTInsertQuery>();
                 insert->table_id = storage->getStorageID();
-                insert->columns = storage_data.buffer.columns_ast;
+                insert->columns = std::make_shared<ASTExpressionList>(buffer.columns_ast);
 
                 InterpreterInsertQuery interpreter(insert, insert_context, true);
                 auto io = interpreter.execute();
@@ -603,10 +636,11 @@ void MaterializedPostgreSQLConsumer::syncTables()
         catch (...)
         {
             /// Retry this buffer later.
-            storage_data.buffer.columns = result_rows.mutateColumns();
+            buffer.columns = result_rows.mutateColumns();
             throw;
         }
 
+        storage_data.setBuffer(nullptr);
         tables_to_sync.erase(tables_to_sync.begin());
     }
 
@@ -616,7 +650,6 @@ void MaterializedPostgreSQLConsumer::syncTables()
     updateLsn();
 }
 
-
 void MaterializedPostgreSQLConsumer::updateLsn()
 {
     try
@@ -632,7 +665,6 @@ void MaterializedPostgreSQLConsumer::updateLsn()
     }
 }
 
-
 String MaterializedPostgreSQLConsumer::advanceLSN(std::shared_ptr<pqxx::nontransaction> tx)
 {
     std::string query_str = fmt::format("SELECT end_lsn FROM pg_replication_slot_advance('{}', '{}')", replication_slot_name, final_lsn);
@@ -644,7 +676,6 @@ String MaterializedPostgreSQLConsumer::advanceLSN(std::shared_ptr<pqxx::nontrans
     return final_lsn;
 }
 
-
 /// Sync for some table might not be allowed if:
 /// 1. Table schema changed and might break synchronization.
 /// 2. There is no storage for this table. (As a result of some exception or incorrect pg_publication)
@@ -700,7 +731,6 @@ bool MaterializedPostgreSQLConsumer::isSyncAllowed(Int32 relation_id, const Stri
     return false;
 }
 
-
 void MaterializedPostgreSQLConsumer::markTableAsSkipped(Int32 relation_id, const String & relation_name)
 {
     skip_list.insert({relation_id, ""}); /// Empty lsn string means - continue waiting for valid lsn.
@@ -712,12 +742,11 @@ void MaterializedPostgreSQLConsumer::markTableAsSkipped(Int32 relation_id, const
         relation_name, relation_id);
 }
 
-
 void MaterializedPostgreSQLConsumer::addNested(
     const String & postgres_table_name, StorageInfo nested_storage_info, const String & table_start_lsn)
 {
     assert(!storages.contains(postgres_table_name));
-    storages.emplace(postgres_table_name, nested_storage_info);
+    storages.emplace(postgres_table_name, StorageData(nested_storage_info, log));
 
     auto it = deleted_tables.find(postgres_table_name);
     if (it != deleted_tables.end())
@@ -728,17 +757,15 @@ void MaterializedPostgreSQLConsumer::addNested(
     waiting_list[postgres_table_name] = table_start_lsn;
 }
 
-
 void MaterializedPostgreSQLConsumer::updateNested(const String & table_name, StorageInfo nested_storage_info, Int32 table_id, const String & table_start_lsn)
 {
     assert(!storages.contains(table_name));
-    storages.emplace(table_name, nested_storage_info);
+    storages.emplace(table_name, StorageData(nested_storage_info, log));
 
     /// Set start position to valid lsn. Before it was an empty string. Further read for table allowed, if it has a valid lsn.
     skip_list[table_id] = table_start_lsn;
 }
 
-
 void MaterializedPostgreSQLConsumer::removeNested(const String & postgres_table_name)
 {
     auto it = storages.find(postgres_table_name);
@@ -747,7 +774,6 @@ void MaterializedPostgreSQLConsumer::removeNested(const String & postgres_table_
     deleted_tables.insert(postgres_table_name);
 }
 
-
 void MaterializedPostgreSQLConsumer::setSetting(const SettingChange & setting)
 {
     if (setting.name == "materialized_postgresql_max_block_size")
@@ -756,7 +782,6 @@ void MaterializedPostgreSQLConsumer::setSetting(const SettingChange & setting)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported setting: {}", setting.name);
 }
 
-
 /// Read binary changes from replication slot via COPY command (starting from current lsn in a slot).
 bool MaterializedPostgreSQLConsumer::consume()
 {
diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
index 02cbedb4dd5..d29236b8123 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
@@ -32,32 +32,36 @@ class MaterializedPostgreSQLConsumer
 private:
     struct StorageData
     {
+        explicit StorageData(const StorageInfo & storage_info, Poco::Logger * log_);
+
+        size_t getColumnsNum() const { return table_description.sample_block.columns(); }
+
+        const Block & getSampleBlock() const { return table_description.sample_block; }
+
+        using ArrayInfo = std::unordered_map<size_t, PostgreSQLArrayInfo>;
+
+        const StoragePtr storage;
+        const ExternalResultDescription table_description;
+        const PostgreSQLTableStructure::Attributes columns_attributes;
+        const ArrayInfo array_info;
+
         struct Buffer
         {
-            ExternalResultDescription description;
+            Block sample_block;
             MutableColumns columns;
+            ASTExpressionList columns_ast;
 
-            /// Needed to pass to insert query columns list in syncTables().
-            std::shared_ptr<ASTExpressionList> columns_ast;
-            /// Needed for insertPostgreSQLValue() method to parse array
-            std::unordered_map<size_t, PostgreSQLArrayInfo> array_info;
-            /// To validate ddl.
-            PostgreSQLTableStructure::Attributes attributes;
+            explicit Buffer(ColumnsWithTypeAndName && columns_, const ExternalResultDescription & table_description_);
 
-            Buffer(StorageMetadataPtr storage_metadata, const PostgreSQLTableStructure::Attributes & attributes_);
-
-            size_t getColumnsNum() const
-            {
-                const auto & sample_block = description.sample_block;
-                return sample_block.columns();
-            }
+            void assertInsertIsPossible(size_t col_idx) const;
         };
 
-        StoragePtr storage;
-        Buffer buffer;
+        Buffer & getBuffer();
 
-        explicit StorageData(const StorageInfo & storage_info);
-        StorageData(const StorageData & other) = delete;
+        void setBuffer(std::unique_ptr<Buffer> buffer_) { buffer = std::move(buffer_); }
+
+    private:
+        std::unique_ptr<Buffer> buffer;
     };
 
     using Storages = std::unordered_map<String, StorageData>;
@@ -97,8 +101,8 @@ private:
 
     bool isSyncAllowed(Int32 relation_id, const String & relation_name);
 
-    static void insertDefaultValue(StorageData::Buffer & buffer, size_t column_idx);
-    void insertValue(StorageData::Buffer & buffer, const std::string & value, size_t column_idx);
+    static void insertDefaultValue(StorageData & storage_data, size_t column_idx);
+    void insertValue(StorageData & storage_data, const std::string & value, size_t column_idx);
 
     enum class PostgreSQLQuery
     {
@@ -107,7 +111,7 @@ private:
         DELETE
     };
 
-    void readTupleData(StorageData::Buffer & buffer, const char * message, size_t & pos, size_t size, PostgreSQLQuery type, bool old_value = false);
+    void readTupleData(StorageData & storage_data, const char * message, size_t & pos, size_t size, PostgreSQLQuery type, bool old_value = false);
 
     template<typename T>
     static T unhexN(const char * message, size_t pos, size_t n);
@@ -119,8 +123,6 @@ private:
 
     void markTableAsSkipped(Int32 relation_id, const String & relation_name);
 
-    static void assertCorrectInsertion(StorageData::Buffer & buffer, size_t column_idx);
-
     /// lsn - log sequence number, like wal offset (64 bit).
     static Int64 getLSNValue(const std::string & lsn)
     {
diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
index ee38dcb44d4..7a73bdf153b 100644
--- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
+++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
@@ -337,6 +337,7 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error)
             dropReplicationSlot(tx);
 
         initial_sync();
+        LOG_DEBUG(log, "Loaded {} tables", nested_storages.size());
     }
     /// Synchronization and initial load already took place - do not create any new tables, just fetch StoragePtr's
     /// and pass them to replication consumer.
@@ -414,16 +415,18 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection
     std::string query_str = fmt::format("SET TRANSACTION SNAPSHOT '{}'", snapshot_name);
     tx->exec(query_str);
 
+    auto table_structure = fetchTableStructure(*tx, table_name);
+    if (!table_structure->physical_columns)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No table attributes");
+
+    auto table_attributes = table_structure->physical_columns->attributes;
+
     /// Load from snapshot, which will show table state before creation of replication slot.
     /// Already connected to needed database, no need to add it to query.
     auto quoted_name = doubleQuoteWithSchema(table_name);
     query_str = fmt::format("SELECT * FROM ONLY {}", quoted_name);
-    LOG_DEBUG(log, "Loading PostgreSQL table {}.{}", postgres_database, quoted_name);
 
-    auto table_structure = fetchTableStructure(*tx, table_name);
-    if (!table_structure->physical_columns)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No table attributes");
-    auto table_attributes = table_structure->physical_columns->attributes;
+    LOG_DEBUG(log, "Loading PostgreSQL table {}.{}", postgres_database, quoted_name);
 
     auto table_override = tryGetTableOverride(current_database_name, table_name);
     materialized_storage->createNestedIfNeeded(std::move(table_structure), table_override ? table_override->as<ASTTableOverride>() : nullptr);
@@ -444,12 +447,16 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection
     assertBlocksHaveEqualStructure(input->getPort().getHeader(), block_io.pipeline.getHeader(), "postgresql replica load from snapshot");
     block_io.pipeline.complete(Pipe(std::move(input)));
 
+    /// TODO: make a test when we fail in the middle of inserting data from source.
+
     CompletedPipelineExecutor executor(block_io.pipeline);
     executor.execute();
 
     materialized_storage->set(nested_storage);
     auto nested_table_id = nested_storage->getStorageID();
-    LOG_DEBUG(log, "Loaded table {}.{} (uuid: {})", nested_table_id.database_name, nested_table_id.table_name, toString(nested_table_id.uuid));
+
+    LOG_DEBUG(log, "Loaded table {}.{} (uuid: {})",
+              nested_table_id.database_name, nested_table_id.table_name, toString(nested_table_id.uuid));
 
     return StorageInfo(nested_storage, std::move(table_attributes));
 }
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index d83722dba6c..c753a41be40 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -25,6 +25,8 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/ExpressionListParsers.h>
+#include <Parsers/formatAST.h>
 
 #include <Interpreters/applyTableOverride.h>
 #include <Interpreters/InterpreterDropQuery.h>
@@ -195,7 +197,8 @@ void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructur
         const auto ast_create = getCreateNestedTableQuery(std::move(table_structure), table_override);
         auto table_id = getStorageID();
         auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName());
-        LOG_DEBUG(log, "Creating clickhouse table for postgresql table {}", table_id.getNameForLogs());
+        LOG_DEBUG(log, "Creating clickhouse table for postgresql table {} (ast: {})",
+                  table_id.getNameForLogs(), serializeAST(*ast_create));
 
         InterpreterCreateQuery interpreter(ast_create, nested_context);
         interpreter.execute();
@@ -359,7 +362,8 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d
 }
 
 
-std::shared_ptr<ASTExpressionList> StorageMaterializedPostgreSQL::getColumnsExpressionList(const NamesAndTypesList & columns) const
+std::shared_ptr<ASTExpressionList>
+StorageMaterializedPostgreSQL::getColumnsExpressionList(const NamesAndTypesList & columns, std::unordered_map<std::string, ASTPtr> defaults) const
 {
     auto columns_expression_list = std::make_shared<ASTExpressionList>();
     for (const auto & [name, type] : columns)
@@ -369,6 +373,12 @@ std::shared_ptr<ASTExpressionList> StorageMaterializedPostgreSQL::getColumnsExpr
         column_declaration->name = name;
         column_declaration->type = getColumnDeclaration(type);
 
+        if (auto it = defaults.find(name); it != defaults.end())
+        {
+            column_declaration->default_expression = it->second;
+            column_declaration->default_specifier = "DEFAULT";
+        }
+
         columns_expression_list->children.emplace_back(column_declaration);
     }
     return columns_expression_list;
@@ -460,8 +470,28 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(
         }
         else
         {
-            ordinary_columns_and_types = table_structure->physical_columns->columns;
-            columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types));
+            const auto columns = table_structure->physical_columns;
+            std::unordered_map<std::string, ASTPtr> defaults;
+            for (const auto & col : columns->columns)
+            {
+                const auto & attr = columns->attributes.at(col.name);
+                if (!attr.attr_def.empty())
+                {
+                    ParserExpression expr_parser;
+                    Expected expected;
+                    ASTPtr result;
+
+                    Tokens tokens(attr.attr_def.data(), attr.attr_def.data() + attr.attr_def.size());
+                    IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+                    if (!expr_parser.parse(pos, result, expected))
+                    {
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse default expression: {}", attr.attr_def);
+                    }
+                    defaults.emplace(col.name, result);
+                }
+            }
+            ordinary_columns_and_types = columns->columns;
+            columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types, defaults));
         }
 
         if (ordinary_columns_and_types.empty())
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h
index af0adb10f9f..9c3c195e34f 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h
@@ -109,7 +109,8 @@ public:
 
     ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override);
 
-    std::shared_ptr<ASTExpressionList> getColumnsExpressionList(const NamesAndTypesList & columns) const;
+    std::shared_ptr<ASTExpressionList> getColumnsExpressionList(
+        const NamesAndTypesList & columns, std::unordered_map<std::string, ASTPtr> defaults = {}) const;
 
     StoragePtr getNested() const;
 
diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py
index e8053730c44..2a72c3591e9 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py
@@ -810,6 +810,150 @@ def test_replica_consumer(started_cluster):
     pg_manager_instance2.clear()
 
 
+def test_replica_consumer(started_cluster):
+    table = "test_replica_consumer"
+    pg_manager_instance2.restart()
+
+    pg_manager.create_postgres_table(table)
+    instance.query(
+        f"INSERT INTO postgres_database.{table} SELECT number, number from numbers(0, 50)"
+    )
+
+    for pm in [pg_manager, pg_manager_instance2]:
+        pm.create_materialized_db(
+            ip=started_cluster.postgres_ip,
+            port=started_cluster.postgres_port,
+            settings=[
+                f"materialized_postgresql_tables_list = '{table}'",
+                "materialized_postgresql_backoff_min_ms = 100",
+                "materialized_postgresql_backoff_max_ms = 100",
+                "materialized_postgresql_use_unique_replication_consumer_identifier = 1",
+            ],
+        )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+    check_tables_are_synchronized(
+        instance2, table, postgres_database=pg_manager_instance2.get_default_database()
+    )
+
+    assert 50 == int(instance.query(f"SELECT count() FROM test_database.{table}"))
+    assert 50 == int(instance2.query(f"SELECT count() FROM test_database.{table}"))
+
+    instance.query(
+        f"INSERT INTO postgres_database.{table} SELECT number, number from numbers(1000, 1000)"
+    )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+    check_tables_are_synchronized(
+        instance2, table, postgres_database=pg_manager_instance2.get_default_database()
+    )
+
+    assert 1050 == int(instance.query(f"SELECT count() FROM test_database.{table}"))
+    assert 1050 == int(instance2.query(f"SELECT count() FROM test_database.{table}"))
+
+    for pm in [pg_manager, pg_manager_instance2]:
+        pm.drop_materialized_db()
+    pg_manager_instance2.clear()
+
+
+def test_generated_columns(started_cluster):
+    table = "test_generated_columns"
+
+    pg_manager.create_postgres_table(
+        table,
+        "",
+        f"""CREATE TABLE {table} (
+             key integer PRIMARY KEY,
+             x integer,
+             y integer GENERATED ALWAYS AS (x*2) STORED,
+             z text);
+         """,
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');")
+
+    pg_manager.create_materialized_db(
+        ip=started_cluster.postgres_ip,
+        port=started_cluster.postgres_port,
+        settings=[
+            f"materialized_postgresql_tables_list = '{table}'",
+            "materialized_postgresql_backoff_min_ms = 100",
+            "materialized_postgresql_backoff_max_ms = 100",
+        ],
+    )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z) values (3,3,'3');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (4,4,'4');")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z) values (5,5,'5');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (6,6,'6');")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+
+def test_default_columns(started_cluster):
+    table = "test_default_columns"
+
+    pg_manager.create_postgres_table(
+        table,
+        "",
+        f"""CREATE TABLE {table} (
+             key integer PRIMARY KEY,
+             x integer,
+             y text DEFAULT 'y1',
+             z integer,
+             a text DEFAULT 'a1',
+             b integer);
+         """,
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (1,1,1,1);")
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (2,2,2,2);")
+
+    pg_manager.create_materialized_db(
+        ip=started_cluster.postgres_ip,
+        port=started_cluster.postgres_port,
+        settings=[
+            f"materialized_postgresql_tables_list = '{table}'",
+            "materialized_postgresql_backoff_min_ms = 100",
+            "materialized_postgresql_backoff_max_ms = 100",
+        ],
+    )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (3,3,3,3);")
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (4,4,4,4);")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (5,5,5,5);")
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (6,6,6,6);")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From 9d8e3f8bd913aa4e7160ebc7c8045a41550cd4fe Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:47:58 +0100
Subject: [PATCH 065/213] Update fetchPostgreSQLTableStructure.cpp

---
 src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 527936f1c19..a3ae864db85 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -25,6 +25,7 @@ namespace ErrorCodes
 {
     extern const int UNKNOWN_TABLE;
     extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 

From b57340bde214855c9e04f77483bdf571d122d822 Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Wed, 6 Dec 2023 11:54:45 -0800
Subject: [PATCH 066/213] postgresql integration: Throw errors instead of
 assuming array_ndim == 1

---
 .../fetchPostgreSQLTableStructure.cpp         | 60 ++++++++++---------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 0a35bc8c2b5..6c25514418e 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -222,40 +222,42 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
         {
             const auto & name_and_type = columns[i];
 
-            /// NOTE: If the relation is empty, then array_ndims returns NULL.
-            /// If this is the case, then assume dimensions=1. This covers most
-            /// use cases, but will be incorrect for empty tables with
-            /// multi-dimension arrays. The other solutions would be to drop
-            /// support for empty tables OR attempt fallback to a discovered
-            /// array_ndims CHECK constraint.
-            int dimensions;
+            /// If the relation is empty, then array_ndims returns NULL.
+            /// ClickHouse cannot support this use case.
             if (isTableEmpty(tx, postgres_table))
-            {
-                dimensions = 1;
-            }
-            else
-            {
-                /// All rows must contain the same number of dimensions.
-                /// 1 is ok. If number of dimensions in all rows is not the same -
-                /// such arrays are not able to be used as ClickHouse Array at all.
-                ///
-                /// Assume dimensions=1 for empty arrays.
-                auto postgres_column = doubleQuoteString(name_and_type.name);
-                pqxx::result result{tx.exec(fmt::format(
-                    "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) "
-                    "FROM {} LIMIT 1;",
-                    postgres_column,
-                    postgres_column,
-                    postgres_table))};
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL relation containing arrays cannot be empty: {}", postgres_table);
 
-                /// Nullable(Array) is not supported.
-                auto is_null = result[0][0].as<bool>();
-                if (is_null)
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL array cannot be NULL. Column: {}", postgres_column);
+            /// All rows must contain the same number of dimensions.
+            /// 1 is ok. If number of dimensions in all rows is not the same -
+            /// such arrays are not able to be used as ClickHouse Array at all.
+            ///
+            /// For empty arrays, array_ndims([]) will return NULL.
+            auto postgres_column = doubleQuoteString(name_and_type.name);
+            pqxx::result result{tx.exec(fmt::format(
+                "SELECT {} IS NULL, array_ndims({}) "
+                "FROM {} LIMIT 1;",
+                postgres_column,
+                postgres_column,
+                postgres_table))};
 
-                dimensions = result[0][1].as<int>();
+            /// Nullable(Array) is not supported.
+            auto is_null_array = result[0][0].as<bool>();
+            if (is_null_array)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL array cannot be NULL: {}.{}", postgres_table, postgres_column);
+
+            /// Cannot infer dimension of empty arrays.
+            auto is_empty_array = result[0][1].is_null();
+            if (is_empty_array)
+            {
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "PostgreSQL cannot infer dimensions of an empty array: {}.{}",
+                    postgres_table,
+                    postgres_column);
             }
 
+            int dimensions = result[0][1].as<int>();
+
             /// It is always 1d array if it is in recheck.
             DataTypePtr type = assert_cast<const DataTypeArray *>(name_and_type.type.get())->getNestedType();
             while (dimensions--)

From 119c2864a07e7ef83a68add87020bbfad869a237 Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Wed, 6 Dec 2023 12:59:28 -0800
Subject: [PATCH 067/213] test_storage_postgresql: mixed-case identifier on
 array column

---
 .../test_storage_postgresql/test.py           | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py
index 11729a5ab18..39896c57b59 100644
--- a/tests/integration/test_storage_postgresql/test.py
+++ b/tests/integration/test_storage_postgresql/test.py
@@ -90,20 +90,20 @@ def test_postgres_conversions(started_cluster):
     cursor.execute(
         """CREATE TABLE test_types (
         a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial,
-        h timestamp, i date, j decimal(5, 3), k numeric, l boolean)"""
+        h timestamp, i date, j decimal(5, 3), k numeric, l boolean, "M" integer)"""
     )
     node1.query(
         """
         INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword') VALUES
-        (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 22.222, 22.222, 1)"""
+        (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 22.222, 22.222, 1, 42)"""
     )
     result = node1.query(
         """
-        SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3), l FROM postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword')"""
+        SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3), l, "M" FROM postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword')"""
     )
     assert (
         result
-        == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t22.222\t22.222\t1\n"
+        == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t22.222\t22.222\t1\t42\n"
     )
 
     cursor.execute(
@@ -132,7 +132,8 @@ def test_postgres_conversions(started_cluster):
                 i Char(2)[][][][],                          -- Nullable(String)
                 j Char(2)[],                                -- Nullable(String)
                 k UUID[],                                   -- Nullable(UUID)
-                l UUID[][]                                  -- Nullable(UUID)
+                l UUID[][],                                 -- Nullable(UUID)
+                "M" integer[] NOT NULL                      -- Int32 (mixed-case identifier)
            )"""
     )
 
@@ -152,7 +153,8 @@ def test_postgres_conversions(started_cluster):
         "i\tArray(Array(Array(Array(Nullable(String)))))\t\t\t\t\t\n"
         "j\tArray(Nullable(String))\t\t\t\t\t\n"
         "k\tArray(Nullable(UUID))\t\t\t\t\t\n"
-        "l\tArray(Array(Nullable(UUID)))"
+        "l\tArray(Array(Nullable(UUID)))\t\t\t\t\t\n"
+        "M\tArray(Int32)"
         ""
     )
     assert result.rstrip() == expected
@@ -171,7 +173,8 @@ def test_postgres_conversions(started_cluster):
         "[[[[NULL]]]], "
         "[], "
         "['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', '42209d53-d641-4d73-a8b6-c038db1e75d6', NULL], "
-        "[[NULL, '42209d53-d641-4d73-a8b6-c038db1e75d6'], ['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', NULL], [NULL, NULL]]"
+        "[[NULL, '42209d53-d641-4d73-a8b6-c038db1e75d6'], ['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', NULL], [NULL, NULL]],"
+        "[42, 42, 42]"
         ")"
     )
 
@@ -191,7 +194,8 @@ def test_postgres_conversions(started_cluster):
         "[[[[NULL]]]]\t"
         "[]\t"
         "['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a','42209d53-d641-4d73-a8b6-c038db1e75d6',NULL]\t"
-        "[[NULL,'42209d53-d641-4d73-a8b6-c038db1e75d6'],['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a',NULL],[NULL,NULL]]\n"
+        "[[NULL,'42209d53-d641-4d73-a8b6-c038db1e75d6'],['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a',NULL],[NULL,NULL]]\t"
+        "[42,42,42]\n"
     )
     assert result == expected
 

From b44dadc5c6cd7671e39cee69d139730a1c3fea62 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 7 Dec 2023 00:45:57 +0100
Subject: [PATCH 068/213] Identify failed jobs in lambda and mark as steps=0

---
 tests/ci/workflow_jobs_lambda/app.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index c624a492604..6931835f601 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -8,11 +8,11 @@ Then it either posts it as is to the play.clickhouse.com, or anonymizes the sens
 fields for private repositories
 """
 
+import json
+import logging
 from base64 import b64decode
 from dataclasses import dataclass
 from typing import Any, List, Optional
-import json
-import logging
 
 from lambda_shared import ClickHouseHelper, InsertException, get_parameter_from_ssm
 
@@ -126,6 +126,20 @@ def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
         )
 
 
+def killed_job(wf_job: dict) -> bool:
+    """a hack to identify the killed runner if "Complete job" is omit"""
+    if (
+        wf_job.get("status", "") != "completed"
+        or wf_job.get("conclusion", "") != "failure"
+    ):
+        # The task either success or in progress
+        return False
+    return not any(
+        step["name"] == "Complete job" and step["conclusion"] is not None
+        for step in wf_job["steps"]
+    )
+
+
 def handler(event: dict, context: Any) -> dict:
     if event["isBase64Encoded"]:
         event_data = json.loads(b64decode(event["body"]))
@@ -141,8 +155,14 @@ def handler(event: dict, context: Any) -> dict:
         logging.error("The event data: %s", event)
         logging.error("The context data: %s", context)
 
-    # We record only finished steps
-    steps = len([step for step in wf_job["steps"] if step["conclusion"] is not None])
+    if killed_job(wf_job):
+        # for killed job we record 0
+        steps = 0
+    else:
+        # We record only finished steps
+        steps = len(
+            [step for step in wf_job["steps"] if step["conclusion"] is not None]
+        )
 
     workflow_job = WorkflowJob(
         wf_job["id"],

From 7d4142693513b0cccfedc0d1398e849e53f36107 Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Wed, 6 Dec 2023 18:25:45 -0800
Subject: [PATCH 069/213] test_storage_postgresql: Add
 test_postgres_array_ndim_error_messges()

Tests:

1. View with array column cannot be empty --> error message
2. View cannot have empty array --> error message
3. View cannot have NULL array value --> error message
4. Ensures PG identifiers that require quoting do not crash ClickHouse.

These apply to views that contain arrays.
---
 .../test_storage_postgresql/test.py           | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py
index 39896c57b59..8e1be600687 100644
--- a/tests/integration/test_storage_postgresql/test.py
+++ b/tests/integration/test_storage_postgresql/test.py
@@ -202,6 +202,53 @@ def test_postgres_conversions(started_cluster):
     cursor.execute(f"DROP TABLE test_types")
     cursor.execute(f"DROP TABLE test_array_dimensions")
 
+def test_postgres_array_ndim_error_messges(started_cluster):
+    cursor = started_cluster.postgres_conn.cursor()
+
+    # cleanup
+    cursor.execute('DROP VIEW  IF EXISTS array_ndim_view;')
+    cursor.execute('DROP TABLE IF EXISTS array_ndim_table;')
+
+    # setup
+    cursor.execute('CREATE TABLE array_ndim_table (x INTEGER, "Mixed-case with spaces" INTEGER[]);')
+    cursor.execute('CREATE VIEW  array_ndim_view AS SELECT * FROM array_ndim_table;')
+    describe_table = """
+    DESCRIBE TABLE postgresql(
+        'postgres1:5432', 'postgres', 'array_ndim_view',
+        'postgres', 'mysecretpassword'
+    )
+    """
+
+    # View with array column cannot be empty. Should throw a useful error message.
+    # (Cannot infer array dimension.)
+    try:
+        node1.query(describe_table)
+        assert False
+    except Exception as error:
+        assert ('PostgreSQL relation containing arrays cannot be empty: array_ndim_view' in str(error))
+
+    # View cannot have empty array. Should throw useful error message.
+    # (Cannot infer array dimension.)
+    cursor.execute('TRUNCATE array_ndim_table;')
+    cursor.execute("INSERT INTO array_ndim_table VALUES (1234, '{}');")
+    try:
+        node1.query(describe_table)
+        assert False
+    except Exception as error:
+        assert ('PostgreSQL cannot infer dimensions of an empty array: array_ndim_view."Mixed-case with spaces"' in str(error))
+
+    # View cannot have NULL array value. Should throw useful error message.
+    cursor.execute('TRUNCATE array_ndim_table;')
+    cursor.execute('INSERT INTO array_ndim_table VALUES (1234, NULL);')
+    try:
+        node1.query(describe_table)
+        assert False
+    except Exception as error:
+        assert ('PostgreSQL array cannot be NULL: array_ndim_view."Mixed-case with spaces"' in str(error))
+
+    # cleanup
+    cursor.execute('DROP VIEW  IF EXISTS array_ndim_view;')
+    cursor.execute('DROP TABLE IF EXISTS array_ndim_table;')
 
 def test_non_default_schema(started_cluster):
     node1.query("DROP TABLE IF EXISTS test_pg_table_schema")

From 6a698d94804c2fd165c29ea168ec64383e333d3e Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Wed, 6 Dec 2023 18:56:54 -0800
Subject: [PATCH 070/213] clang-tidy

---
 .../PostgreSQL/fetchPostgreSQLTableStructure.cpp          | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 6c25514418e..d4f9bb6dcf4 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -233,12 +233,8 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
             ///
             /// For empty arrays, array_ndims([]) will return NULL.
             auto postgres_column = doubleQuoteString(name_and_type.name);
-            pqxx::result result{tx.exec(fmt::format(
-                "SELECT {} IS NULL, array_ndims({}) "
-                "FROM {} LIMIT 1;",
-                postgres_column,
-                postgres_column,
-                postgres_table))};
+            pqxx::result result{tx.exec(
+                fmt::format("SELECT {} IS NULL, array_ndims({}) FROM {} LIMIT 1;", postgres_column, postgres_column, postgres_table))};
 
             /// Nullable(Array) is not supported.
             auto is_null_array = result[0][0].as<bool>();

From d0675488acf65d2391c9dbcff8f58ade93c73384 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 5 Dec 2023 16:16:11 +0800
Subject: [PATCH 071/213]  rebase

---
 src/Databases/DatabaseLazy.cpp                |    3 +-
 src/Databases/IDatabase.cpp                   |    3 +-
 src/Interpreters/Context.cpp                  | 1042 +++++++----------
 .../0_stateless/02931_max_num_to_warn.sql     |   58 +-
 4 files changed, 432 insertions(+), 674 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index caf14aa9b15..c6249c68933 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -19,7 +19,8 @@
 namespace fs = std::filesystem;
 
 
-namespace CurrentMetrics {
+namespace CurrentMetrics
+{
     extern const Metric AttachedTable;
 }
 
diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp
index b4fd5ea4612..95fcf0c7939 100644
--- a/src/Databases/IDatabase.cpp
+++ b/src/Databases/IDatabase.cpp
@@ -35,7 +35,8 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const
         throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} does not exist. Maybe you meant {}?", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name), backQuoteIfNeed(names[0]));
 }
 
-IDatabase::IDatabase(String database_name_) : database_name(std::move(database_name_)) {
+IDatabase::IDatabase(String database_name_) : database_name(std::move(database_name_))
+{
     CurrentMetrics::add(CurrentMetrics::AttachedDatabase, 1);
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index dd1f5c76370..1e732083c9d 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1,5 +1,6 @@
-#include <filesystem>
 #include <map>
+#include <set>
+#include <optional>
 #include <memory>
 #include <Poco/UUID.h>
 #include <Poco/Net/NameValueCollection.h>
@@ -51,119 +52,74 @@
 #include <Access/ContextAccess.h>
 #include <Access/EnabledRolesInfo.h>
 #include <Access/EnabledRowPolicies.h>
-#include <Access/ExternalAuthenticators.h>
-#include <Access/GSSAcceptor.h>
 #include <Access/QuotaUsage.h>
-#include <Access/SettingsConstraintsAndProfileIDs.h>
+#include <Access/User.h>
 #include <Access/SettingsProfile.h>
 #include <Access/SettingsProfilesInfo.h>
-#include <Access/User.h>
+#include <Access/SettingsConstraintsAndProfileIDs.h>
+#include <Access/ExternalAuthenticators.h>
+#include <Access/GSSAcceptor.h>
+#include <IO/ResourceManagerFactory.h>
 #include <Backups/BackupsWorker.h>
-#include <Coordination/KeeperDispatcher.h>
-#include <Core/BackgroundSchedulePool.h>
-#include <Core/ServerSettings.h>
-#include <Core/Settings.h>
-#include <Core/SettingsQuirks.h>
-#include <Databases/IDatabase.h>
 #include <Dictionaries/Embedded/GeoDictionariesLoader.h>
-#include <Disks/DiskLocal.h>
-#include <Disks/ObjectStorages/DiskObjectStorage.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Disks/StoragePolicy.h>
-#include <Formats/FormatFactory.h>
+#include <Interpreters/EmbeddedDictionaries.h>
+#include <Interpreters/ExternalDictionariesLoader.h>
 #include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
 #include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
 #include <Functions/UserDefined/createUserDefinedSQLObjectsLoader.h>
-#include <IO/MMappedFileCache.h>
-#include <IO/ReadBufferFromFile.h>
-#include <IO/ResourceManagerFactory.h>
-#include <IO/SynchronousReader.h>
-#include <IO/UncompressedCache.h>
-#include <IO/WriteSettings.h>
-#include <Interpreters/ActionLocksManager.h>
-#include <Interpreters/AsynchronousInsertQueue.h>
-#include <Interpreters/Cache/FileCacheFactory.h>
-#include <Interpreters/Cache/QueryCache.h>
-#include <Interpreters/Cluster.h>
-#include <Interpreters/ClusterDiscovery.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/DDLTask.h>
-#include <Interpreters/DDLWorker.h>
-#include <Interpreters/DatabaseCatalog.h>
-#include <Interpreters/EmbeddedDictionaries.h>
-#include <Interpreters/ExternalDictionariesLoader.h>
-#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
-#include <Interpreters/InterpreterSelectWithUnionQuery.h>
-#include <Interpreters/InterserverCredentials.h>
-#include <Interpreters/InterserverIOHandler.h>
-#include <Interpreters/JIT/CompiledExpressionCache.h>
-#include <Interpreters/Lemmatizers.h>
-#include <Interpreters/PreparedSets.h>
 #include <Interpreters/ProcessList.h>
+#include <Interpreters/InterserverCredentials.h>
+#include <Interpreters/Cluster.h>
+#include <Interpreters/InterserverIOHandler.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/DDLWorker.h>
+#include <Interpreters/DDLTask.h>
 #include <Interpreters/Session.h>
-#include <Interpreters/SessionTracker.h>
-#include <Interpreters/SynonymsExtensions.h>
-#include <Interpreters/TemporaryDataOnDisk.h>
 #include <Interpreters/TraceCollector.h>
-#include <Interpreters/TransactionLog.h>
-#include <Parsers/ASTAsterisk.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/UncompressedCache.h>
+#include <IO/MMappedFileCache.h>
+#include <IO/WriteSettings.h>
 #include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTAsterisk.h>
 #include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/FunctionParameterValuesVisitor.h>
-#include <Server/ServerType.h>
-#include <Storages/CompressionCodecSelector.h>
-#include <Storages/MarkCache.h>
-#include <Storages/MergeTree/BackgroundJobsAssignee.h>
-#include <Storages/MergeTree/MergeList.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-#include <Storages/MergeTree/MergeTreeSettings.h>
-#include <Storages/MergeTree/MovesList.h>
-#include <Storages/MergeTree/ReplicatedFetchList.h>
-#include <Storages/StorageS3Settings.h>
-#include <Storages/StorageView.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <re2/re2.h>
-#include <Poco/Net/NameValueCollection.h>
-#include <Poco/UUID.h>
-#include <Poco/Util/Application.h>
-#include <Common/Config/AbstractConfigurationComparison.h>
+#include <Common/StackTrace.h>
 #include <Common/Config/ConfigHelper.h>
 #include <Common/Config/ConfigProcessor.h>
-#include <Common/EventNotifier.h>
-#include <Common/FieldVisitorToString.h>
-#include <Common/HTTPHeaderFilter.h>
-#include <Common/Macros.h>
-#include <Common/RemoteHostFilter.h>
-#include <Common/SensitiveDataMasker.h>
-#include <Common/SharedLockGuard.h>
-#include <Common/ShellCommand.h>
-#include <Common/StackTrace.h>
-#include <Common/Stopwatch.h>
-#include <Common/Throttler.h>
+#include <Common/Config/AbstractConfigurationComparison.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/callOnce.h>
-#include <Common/formatReadable.h>
-#include <Common/getMultipleKeysFromConfig.h>
+#include <Common/ShellCommand.h>
 #include <Common/logger_useful.h>
-#include <Common/thread_local_rng.h>
+#include <Common/RemoteHostFilter.h>
+#include <Common/HTTPHeaderFilter.h>
+#include <Interpreters/AsynchronousInsertQueue.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/JIT/CompiledExpressionCache.h>
+#include <Storages/MergeTree/BackgroundJobsAssignee.h>
+#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
+#include <Interpreters/SynonymsExtensions.h>
+#include <Interpreters/Lemmatizers.h>
+#include <Interpreters/ClusterDiscovery.h>
+#include <Interpreters/TransactionLog.h>
+#include <filesystem>
+#include <re2/re2.h>
+#include <Storages/StorageView.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/FunctionParameterValuesVisitor.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Interpreters/InterpreterSelectWithUnionQuery.h>
 
 
 namespace fs = std::filesystem;
 
 namespace ProfileEvents
 {
-extern const Event ContextLock;
-extern const Event ContextLockWaitMicroseconds;
+    extern const Event ContextLock;
+    extern const Event ContextLockWaitMicroseconds;
 }
 
 namespace CurrentMetrics
 {
-    extern const Metric AttachedTable;
-    extern const Metric AttachedDatabase;
-    extern const Metric PartsActive;
     extern const Metric ContextLockWait;
     extern const Metric BackgroundMovePoolTask;
     extern const Metric BackgroundMovePoolSize;
@@ -196,6 +152,9 @@ namespace CurrentMetrics
     extern const Metric TablesLoaderForegroundThreadsActive;
     extern const Metric TablesLoaderForegroundThreadsScheduled;
     extern const Metric IOWriterThreadsScheduled;
+    extern const Metric AttachedTable;
+    extern const Metric AttachedDatabase;
+    extern const Metric PartsActive;
 }
 
 
@@ -204,33 +163,32 @@ namespace DB
 
 namespace ErrorCodes
 {
-extern const int BAD_ARGUMENTS;
-extern const int UNKNOWN_DATABASE;
-extern const int UNKNOWN_TABLE;
-extern const int TABLE_ALREADY_EXISTS;
-extern const int THERE_IS_NO_SESSION;
-extern const int THERE_IS_NO_QUERY;
-extern const int NO_ELEMENTS_IN_CONFIG;
-extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT;
-extern const int LOGICAL_ERROR;
-extern const int INVALID_SETTING_VALUE;
-extern const int UNKNOWN_READ_METHOD;
-extern const int NOT_IMPLEMENTED;
-extern const int UNKNOWN_FUNCTION;
-extern const int ILLEGAL_COLUMN;
-extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
-extern const int CLUSTER_DOESNT_EXIST;
+    extern const int BAD_ARGUMENTS;
+    extern const int UNKNOWN_DATABASE;
+    extern const int UNKNOWN_TABLE;
+    extern const int TABLE_ALREADY_EXISTS;
+    extern const int THERE_IS_NO_SESSION;
+    extern const int THERE_IS_NO_QUERY;
+    extern const int NO_ELEMENTS_IN_CONFIG;
+    extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT;
+    extern const int LOGICAL_ERROR;
+    extern const int INVALID_SETTING_VALUE;
+    extern const int UNKNOWN_READ_METHOD;
+    extern const int NOT_IMPLEMENTED;
+    extern const int UNKNOWN_FUNCTION;
+    extern const int ILLEGAL_COLUMN;
+    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
+    extern const int CLUSTER_DOESNT_EXIST;
 }
 
-#define SHUTDOWN(log, desc, ptr, method) \
-    do \
-    { \
-        if (ptr) \
-        { \
-            LOG_DEBUG(log, "Shutting down " desc); \
-            (ptr)->method; \
-        } \
-    } while (false)
+#define SHUTDOWN(log, desc, ptr, method) do             \
+{                                                       \
+    if (ptr)                                            \
+    {                                                   \
+        LOG_DEBUG(log, "Shutting down " desc);          \
+        (ptr)->method;                                  \
+    }                                                   \
+} while (false)                                         \
 
 /** Set of known objects (environment), that could be used in query.
   * Shared (global) part. Order of members (especially, order of destruction) is very important.
@@ -252,8 +210,8 @@ struct ContextSharedPart : boost::noncopyable
     /// Separate mutex for re-initialization of zookeeper session. This operation could take a long time and must not interfere with another operations.
     mutable std::mutex zookeeper_mutex;
 
-    mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex); /// Client for ZooKeeper.
-    ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex); /// Stores zookeeper configs
+    mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex);                 /// Client for ZooKeeper.
+    ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex);                      /// Stores zookeeper configs
 
     ConfigurationPtr sensitive_data_masker_config;
 
@@ -262,25 +220,23 @@ struct ContextSharedPart : boost::noncopyable
     mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
 #endif
     mutable std::mutex auxiliary_zookeepers_mutex;
-    mutable std::map<String, zkutil::ZooKeeperPtr>
-        auxiliary_zookeepers TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Map for auxiliary ZooKeeper clients.
-    ConfigurationPtr auxiliary_zookeepers_config TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Stores auxiliary zookeepers configs
+    mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers TSA_GUARDED_BY(auxiliary_zookeepers_mutex);    /// Map for auxiliary ZooKeeper clients.
+    ConfigurationPtr auxiliary_zookeepers_config TSA_GUARDED_BY(auxiliary_zookeepers_mutex);           /// Stores auxiliary zookeepers configs
 
     /// No lock required for interserver_io_host, interserver_io_port, interserver_scheme modified only during initialization
-    String interserver_io_host; /// The host name by which this server is available for other servers.
-    UInt16 interserver_io_port = 0; /// and port.
-    String interserver_scheme; /// http or https
+    String interserver_io_host;                             /// The host name by which this server is available for other servers.
+    UInt16 interserver_io_port = 0;                         /// and port.
+    String interserver_scheme;                              /// http or https
     MultiVersion<InterserverCredentials> interserver_io_credentials;
 
-    String path TSA_GUARDED_BY(mutex); /// Path to the data directory, with a slash at the end.
-    String flags_path TSA_GUARDED_BY(mutex); /// Path to the directory with some control flags for server maintenance.
-    String user_files_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided files, usable by 'file' table function.
-    String dictionaries_lib_path
-        TSA_GUARDED_BY(mutex); /// Path to the directory with user provided binaries and libraries for external dictionaries.
-    String user_scripts_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided scripts.
-    String filesystem_caches_path TSA_GUARDED_BY(mutex); /// Path to the directory with filesystem caches.
-    ConfigurationPtr config TSA_GUARDED_BY(mutex); /// Global configuration settings.
-    String tmp_path TSA_GUARDED_BY(mutex); /// Path to the temporary files that occur when processing the request.
+    String path TSA_GUARDED_BY(mutex);                       /// Path to the data directory, with a slash at the end.
+    String flags_path TSA_GUARDED_BY(mutex);                 /// Path to the directory with some control flags for server maintenance.
+    String user_files_path TSA_GUARDED_BY(mutex);            /// Path to the directory with user provided files, usable by 'file' table function.
+    String dictionaries_lib_path TSA_GUARDED_BY(mutex);      /// Path to the directory with user provided binaries and libraries for external dictionaries.
+    String user_scripts_path TSA_GUARDED_BY(mutex);          /// Path to the directory with user provided scripts.
+    String filesystem_caches_path TSA_GUARDED_BY(mutex);     /// Path to the directory with filesystem caches.
+    ConfigurationPtr config TSA_GUARDED_BY(mutex);           /// Global configuration settings.
+    String tmp_path TSA_GUARDED_BY(mutex);                   /// Path to the temporary files that occur when processing the request.
 
     /// All temporary files that occur when processing the requests accounted here.
     /// Child scopes for more fine-grained accounting are created per user/query/etc.
@@ -296,11 +252,8 @@ struct ContextSharedPart : boost::noncopyable
     ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository TSA_GUARDED_BY(external_dictionaries_mutex) = nullptr;
     scope_guard dictionaries_xmls TSA_GUARDED_BY(external_dictionaries_mutex);
 
-    mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader>
-        external_user_defined_executable_functions_loader TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
-    ExternalLoaderXMLConfigRepository *
-        user_defined_executable_functions_config_repository TSA_GUARDED_BY(external_user_defined_executable_functions_mutex)
-        = nullptr;
+    mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> external_user_defined_executable_functions_loader TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
+    ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository TSA_GUARDED_BY(external_user_defined_executable_functions_mutex) = nullptr;
     scope_guard user_defined_executable_functions_xmls TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
 
     mutable OnceFlag user_defined_sql_objects_loader_initialized;
@@ -318,44 +271,39 @@ struct ContextSharedPart : boost::noncopyable
     std::optional<BackupsWorker> backups_worker;
 
     /// No lock required for default_profile_name, system_profile_name, buffer_profile_name modified only during initialization
-    String default_profile_name; /// Default profile name used for default values.
-    String system_profile_name; /// Profile used by system processes
-    String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
+    String default_profile_name;                                /// Default profile name used for default values.
+    String system_profile_name;                                 /// Profile used by system processes
+    String buffer_profile_name;                                 /// Profile used by Buffer engine for flushing to the underlying
     std::unique_ptr<AccessControl> access_control TSA_GUARDED_BY(mutex);
     mutable OnceFlag resource_manager_initialized;
     mutable ResourceManagerPtr resource_manager;
-    mutable UncompressedCachePtr uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks.
-    mutable MarkCachePtr mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files.
+    mutable UncompressedCachePtr uncompressed_cache TSA_GUARDED_BY(mutex);            /// The cache of decompressed blocks.
+    mutable MarkCachePtr mark_cache TSA_GUARDED_BY(mutex);                            /// Cache of marks in compressed files.
     mutable OnceFlag load_marks_threadpool_initialized;
-    mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
+    mutable std::unique_ptr<ThreadPool> load_marks_threadpool;  /// Threadpool for loading marks cache.
     mutable OnceFlag prefetch_threadpool_initialized;
-    mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache.
-    mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices.
-    mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
-    mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
-    mutable MMappedFileCachePtr mmap_cache
-        TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
-    ProcessList process_list; /// Executing queries at the moment.
+    mutable std::unique_ptr<ThreadPool> prefetch_threadpool;    /// Threadpool for loading marks cache.
+    mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex);      /// The cache of decompressed blocks for MergeTree indices.
+    mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex);                          /// Cache of query results.
+    mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex);                      /// Cache of marks in compressed files of MergeTree indices.
+    mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex);                     /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
+    ProcessList process_list;                                   /// Executing queries at the moment.
     SessionTracker session_tracker;
     GlobalOvercommitTracker global_overcommit_tracker;
-    MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree)
-    MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree)
+    MergeList merge_list;                                       /// The list of executable merge (for (Replicated)?MergeTree)
+    MovesList moves_list;                                       /// The list of executing moves (for (Replicated)?MergeTree)
     ReplicatedFetchList replicated_fetch_list;
-    ConfigurationPtr users_config TSA_GUARDED_BY(mutex); /// Config with the users, profiles and quotas sections.
-    InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
+    ConfigurationPtr users_config TSA_GUARDED_BY(mutex);                              /// Config with the users, profiles and quotas sections.
+    InterserverIOHandler interserver_io_handler;                /// Handler for interserver communication.
 
     OnceFlag buffer_flush_schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool>
-        buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
+    mutable std::unique_ptr<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
     OnceFlag schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool>
-        schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
+    mutable std::unique_ptr<BackgroundSchedulePool> schedule_pool;    /// A thread pool that can run different jobs in background (used in replicated tables)
     OnceFlag distributed_schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool>
-        distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
+    mutable std::unique_ptr<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
     OnceFlag message_broker_schedule_pool_initialized;
-    mutable std::unique_ptr<BackgroundSchedulePool>
-        message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
+    mutable std::unique_ptr<BackgroundSchedulePool> message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
 
     mutable OnceFlag readers_initialized;
     mutable std::unique_ptr<IAsynchronousReader> asynchronous_remote_fs_reader;
@@ -365,16 +313,16 @@ struct ContextSharedPart : boost::noncopyable
     mutable OnceFlag threadpool_writer_initialized;
     mutable std::unique_ptr<ThreadPool> threadpool_writer;
 
-    mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches
-    mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends
+    mutable ThrottlerPtr replicated_fetches_throttler;      /// A server-wide throttler for replicated fetches
+    mutable ThrottlerPtr replicated_sends_throttler;        /// A server-wide throttler for replicated sends
 
-    mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads
-    mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes
+    mutable ThrottlerPtr remote_read_throttler;             /// A server-wide throttler for remote IO reads
+    mutable ThrottlerPtr remote_write_throttler;            /// A server-wide throttler for remote IO writes
 
-    mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
-    mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
+    mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
+    mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
 
-    mutable ThrottlerPtr backups_server_throttler; /// A server-wide throttler for BACKUPs
+    mutable ThrottlerPtr backups_server_throttler;          /// A server-wide throttler for BACKUPs
 
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker TSA_GUARDED_BY(mutex); /// Process ddl commands from zk.
@@ -388,24 +336,24 @@ struct ContextSharedPart : boost::noncopyable
 
     ServerSettings server_settings;
 
-    std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines.
-    std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines.
+    std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of MergeTree* engines.
+    std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex);   /// Settings of ReplicatedMergeTree* engines.
     std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
     std::unordered_set<String> get_client_http_header_forbidden_headers;
     bool allow_get_client_http_header;
     std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
+    /// No lock required for format_schema_path modified only during initialization
     std::atomic_size_t max_database_num_to_warn = 1000lu;
     std::atomic_size_t max_table_num_to_warn = 5000lu;
     std::atomic_size_t max_part_num_to_warn = 100000lu;
-    /// No lock required for format_schema_path modified only during initialization
-    String format_schema_path; /// Path to a directory that contains schema files used by input formats.
+    String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
     String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.
     mutable OnceFlag action_locks_manager_initialized;
-    ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
+    ActionLocksManagerPtr action_locks_manager;             /// Set of storages' action lockers
     OnceFlag system_logs_initialized;
-    std::unique_ptr<SystemLogs> system_logs TSA_GUARDED_BY(mutex); /// Used to log queries and operations on parts
-    std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage
-    std::vector<String> warnings TSA_GUARDED_BY(mutex); /// Store warning messages about server configuration.
+    std::unique_ptr<SystemLogs> system_logs TSA_GUARDED_BY(mutex);                /// Used to log queries and operations on parts
+    std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex);   /// Settings of S3 storage
+    std::vector<String> warnings TSA_GUARDED_BY(mutex);                           /// Store warning messages about server configuration.
 
     /// Background executors for *MergeTree tables
     /// Has background executors for MergeTree tables been initialized?
@@ -416,17 +364,17 @@ struct ContextSharedPart : boost::noncopyable
     OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex);
     OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex);
 
-    RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex); /// Allowed URL from config.xml
-    HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex); /// Forbidden HTTP headers from config.xml
+    RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex);                    /// Allowed URL from config.xml
+    HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex);                    /// Forbidden HTTP headers from config.xml
 
     /// No lock required for trace_collector modified only during initialization
-    std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
+    std::optional<TraceCollector> trace_collector;          /// Thread collecting traces from threads executing queries
 
     /// Clusters for distributed tables
     /// Initialized on demand (on distributed storages initialization) since Settings should be initialized
-    mutable std::mutex clusters_mutex; /// Guards clusters, clusters_config and cluster_discovery
+    mutable std::mutex clusters_mutex;                       /// Guards clusters, clusters_config and cluster_discovery
     std::shared_ptr<Clusters> clusters TSA_GUARDED_BY(clusters_mutex);
-    ConfigurationPtr clusters_config TSA_GUARDED_BY(clusters_mutex); /// Stores updated configs
+    ConfigurationPtr clusters_config TSA_GUARDED_BY(clusters_mutex);                        /// Stores updated configs
     std::unique_ptr<ClusterDiscovery> cluster_discovery TSA_GUARDED_BY(clusters_mutex);
 
     /// No lock required for async_insert_queue modified only during initialization
@@ -452,7 +400,9 @@ struct ContextSharedPart : boost::noncopyable
     bool is_server_completely_started TSA_GUARDED_BY(mutex) = false;
 
     ContextSharedPart()
-        : access_control(std::make_unique<AccessControl>()), global_overcommit_tracker(&process_list), macros(std::make_unique<Macros>())
+        : access_control(std::make_unique<AccessControl>())
+        , global_overcommit_tracker(&process_list)
+        , macros(std::make_unique<Macros>())
     {
         /// TODO: make it singleton (?)
         static std::atomic<size_t> num_calls{0};
@@ -573,8 +523,7 @@ struct ContextSharedPart : boost::noncopyable
         access_control->setExternalAuthenticatorsConfig(*config_value);
     }
 
-    const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::lock_guard<ContextSharedMutex> &) const
-        TSA_REQUIRES(this->mutex)
+    const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::lock_guard<ContextSharedMutex> &) const TSA_REQUIRES(this->mutex)
     {
         return config ? *config : Poco::Util::Application::instance().config();
     }
@@ -738,7 +687,10 @@ struct ContextSharedPart : boost::noncopyable
         total_memory_tracker.resetOvercommitTracker();
     }
 
-    bool hasTraceCollector() const { return trace_collector.has_value(); }
+    bool hasTraceCollector() const
+    {
+        return trace_collector.has_value();
+    }
 
     void initializeTraceCollector(std::shared_ptr<TraceLog> trace_log)
     {
@@ -804,22 +756,16 @@ ContextData::ContextData() = default;
 ContextData::ContextData(const ContextData &) = default;
 
 Context::Context() = default;
-Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs)
-{
-}
+Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs) {}
 
 SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default;
 SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default;
 SharedContextHolder::SharedContextHolder() = default;
 SharedContextHolder::~SharedContextHolder() = default;
-SharedContextHolder::SharedContextHolder(std::unique_ptr<ContextSharedPart> shared_context) : shared(std::move(shared_context))
-{
-}
+SharedContextHolder::SharedContextHolder(std::unique_ptr<ContextSharedPart> shared_context)
+    : shared(std::move(shared_context)) {}
 
-void SharedContextHolder::reset()
-{
-    shared.reset();
-}
+void SharedContextHolder::reset() { shared.reset(); }
 
 ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part)
 {
@@ -862,57 +808,21 @@ ContextMutablePtr Context::createCopy(const ContextMutablePtr & other)
 
 Context::~Context() = default;
 
-InterserverIOHandler & Context::getInterserverIOHandler()
-{
-    return shared->interserver_io_handler;
-}
-const InterserverIOHandler & Context::getInterserverIOHandler() const
-{
-    return shared->interserver_io_handler;
-}
+InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; }
+const InterserverIOHandler & Context::getInterserverIOHandler() const { return shared->interserver_io_handler; }
 
-ProcessList & Context::getProcessList()
-{
-    return shared->process_list;
-}
-const ProcessList & Context::getProcessList() const
-{
-    return shared->process_list;
-}
-OvercommitTracker * Context::getGlobalOvercommitTracker() const
-{
-    return &shared->global_overcommit_tracker;
-}
+ProcessList & Context::getProcessList() { return shared->process_list; }
+const ProcessList & Context::getProcessList() const { return shared->process_list; }
+OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; }
 
-SessionTracker & Context::getSessionTracker()
-{
-    return shared->session_tracker;
-}
+SessionTracker & Context::getSessionTracker() { return shared->session_tracker; }
 
-MergeList & Context::getMergeList()
-{
-    return shared->merge_list;
-}
-const MergeList & Context::getMergeList() const
-{
-    return shared->merge_list;
-}
-MovesList & Context::getMovesList()
-{
-    return shared->moves_list;
-}
-const MovesList & Context::getMovesList() const
-{
-    return shared->moves_list;
-}
-ReplicatedFetchList & Context::getReplicatedFetchList()
-{
-    return shared->replicated_fetch_list;
-}
-const ReplicatedFetchList & Context::getReplicatedFetchList() const
-{
-    return shared->replicated_fetch_list;
-}
+MergeList & Context::getMergeList() { return shared->merge_list; }
+const MergeList & Context::getMergeList() const { return shared->merge_list; }
+MovesList & Context::getMovesList() { return shared->moves_list; }
+const MovesList & Context::getMovesList() const { return shared->moves_list; }
+ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; }
+const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; }
 
 String Context::resolveDatabase(const String & database_name) const
 {
@@ -964,21 +874,20 @@ Strings Context::getWarnings() const
     {
         SharedLockGuard lock(shared->mutex);
         common_warnings = shared->warnings;
-
         if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<DB::Int64>(shared->max_table_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn));
-
         if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<DB::Int64>(shared->max_database_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_table_num_to_warn));
-
         if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<DB::Int64>(shared->max_part_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of active parts is more than {}", shared->max_part_num_to_warn));
     }
     /// Make setting's name ordered
     std::set<String> obsolete_settings;
     for (const auto & setting : settings)
+    {
         if (setting.isValueChanged() && setting.isObsolete())
             obsolete_settings.emplace(setting.getName());
+    }
 
     if (!obsolete_settings.empty())
     {
@@ -994,8 +903,7 @@ Strings Context::getWarnings() const
         }
         res = res + "]" + (single_element ? " is" : " are")
             + " changed. "
-              "Please check 'SELECT * FROM system.settings WHERE changed AND is_obsolete' and read the changelog at "
-              "https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md";
+              "Please check 'SELECT * FROM system.settings WHERE changed AND is_obsolete' and read the changelog at https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md";
         common_warnings.emplace_back(res);
     }
 
@@ -1093,12 +1001,9 @@ try
 }
 catch (...)
 {
-    DB::tryLogCurrentException(
-        log,
-        fmt::format(
-            "Caught exception while setup temporary path: {}. "
-            "It is ok to skip this exception as cleaning old temporary files is not necessary",
-            path));
+    DB::tryLogCurrentException(log, fmt::format(
+        "Caught exception while setup temporary path: {}. "
+        "It is ok to skip this exception as cleaning old temporary files is not necessary", path));
 }
 
 static VolumePtr createLocalSingleDiskVolume(const std::string & path, const Poco::Util::AbstractConfiguration & config_)
@@ -1122,7 +1027,9 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size)
     VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, shared->getConfigRefWithLock(lock));
 
     for (const auto & disk : volume->getDisks())
+    {
         setupTmpPath(shared->log, disk->getPath());
+    }
 
     shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
 }
@@ -1138,15 +1045,13 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
     }
 
     if (tmp_policy->getVolumes().size() != 1)
-        throw Exception(
-            ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-            "Policy '{}' is used temporary files, such policy should have exactly one volume",
-            policy_name);
+        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+            "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name);
 
     VolumePtr volume = tmp_policy->getVolume(0);
 
     if (volume->getDisks().empty())
-        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files");
+         throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files");
 
     for (const auto & disk : volume->getDisks())
     {
@@ -1159,11 +1064,9 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
         if (dynamic_cast<const DiskLocal *>(disk_ptr.get()) == nullptr)
         {
             const auto * disk_raw_ptr = disk_ptr.get();
-            throw Exception(
-                ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+            throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
                 "Disk '{}' ({}) is not local and can't be used for temporary files",
-                disk_ptr->getName(),
-                typeid(*disk_raw_ptr).name());
+                disk_ptr->getName(), typeid(*disk_raw_ptr).name());
         }
 
         setupTmpPath(shared->log, disk->getPath());
@@ -1288,11 +1191,9 @@ void Context::setUser(const UUID & user_id_, const std::optional<const std::vect
     auto & access_control = getAccessControl();
     auto user = access_control.read<User>(user_id_);
 
-    auto new_current_roles
-        = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
+    auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
     auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {});
-    auto enabled_profiles = access_control.getEnabledSettingsInfo(
-        user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
+    auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
     const auto & database = user->default_database;
 
     /// Apply user's profiles, constraints, settings, roles.
@@ -1383,55 +1284,18 @@ void Context::checkAccessImpl(const Args &... args) const
     return getAccess()->checkAccess(args...);
 }
 
-void Context::checkAccess(const AccessFlags & flags) const
-{
-    return checkAccessImpl(flags);
-}
-void Context::checkAccess(const AccessFlags & flags, std::string_view database) const
-{
-    return checkAccessImpl(flags, database);
-}
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const
-{
-    return checkAccessImpl(flags, database, table);
-}
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const
-{
-    return checkAccessImpl(flags, database, table, column);
-}
-void Context::checkAccess(
-    const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector<std::string_view> & columns) const
-{
-    return checkAccessImpl(flags, database, table, columns);
-}
-void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const
-{
-    return checkAccessImpl(flags, database, table, columns);
-}
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id) const
-{
-    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName());
-}
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, std::string_view column) const
-{
-    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), column);
-}
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const std::vector<std::string_view> & columns) const
-{
-    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns);
-}
-void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const Strings & columns) const
-{
-    checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns);
-}
-void Context::checkAccess(const AccessRightsElement & element) const
-{
-    return checkAccessImpl(element);
-}
-void Context::checkAccess(const AccessRightsElements & elements) const
-{
-    return checkAccessImpl(elements);
-}
+void Context::checkAccess(const AccessFlags & flags) const { return checkAccessImpl(flags); }
+void Context::checkAccess(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); }
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); }
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); }
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector<std::string_view> & columns) const { return checkAccessImpl(flags, database, table, columns); }
+void Context::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); }
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName()); }
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, std::string_view column) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), column); }
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const std::vector<std::string_view> & columns) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns); }
+void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, const Strings & columns) const { checkAccessImpl(flags, table_id.getDatabaseName(), table_id.getTableName(), columns); }
+void Context::checkAccess(const AccessRightsElement & element) const { return checkAccessImpl(element); }
+void Context::checkAccess(const AccessRightsElements & elements) const { return checkAccessImpl(elements); }
 
 std::shared_ptr<const ContextAccess> Context::getAccess() const
 {
@@ -1441,8 +1305,7 @@ std::shared_ptr<const ContextAccess> Context::getAccess() const
         /// If setUserID() was never called then this must be the global context with the full access.
         bool full_access = !user_id;
 
-        return ContextAccessParams{
-            user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info};
+        return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info};
     };
 
     /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights.
@@ -1497,8 +1360,7 @@ std::optional<QuotaUsage> Context::getQuotaUsage() const
     return getAccess()->getQuotaUsage();
 }
 
-void Context::setCurrentProfileWithLock(
-    const String & profile_name, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
+void Context::setCurrentProfileWithLock(const String & profile_name, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
 {
     try
     {
@@ -1518,8 +1380,7 @@ void Context::setCurrentProfileWithLock(const UUID & profile_id, bool check_cons
     setCurrentProfilesWithLock(*profile_info, check_constraints, lock);
 }
 
-void Context::setCurrentProfilesWithLock(
-    const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
+void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock)
 {
     if (check_constraints)
         checkSettingsConstraintsWithLock(profiles_info.settings, SettingSource::PROFILE);
@@ -1560,10 +1421,9 @@ std::vector<UUID> Context::getEnabledProfiles() const
 
 ResourceManagerPtr Context::getResourceManager() const
 {
-    callOnce(
-        shared->resource_manager_initialized,
-        [&]
-        { shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "dynamic")); });
+    callOnce(shared->resource_manager_initialized, [&] {
+        shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "dynamic"));
+    });
 
     return shared->resource_manager;
 }
@@ -1831,18 +1691,17 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             ASTCreateQuery create;
             create.select = query->as<ASTSelectWithUnionQuery>();
             auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, getQueryContext());
-            auto res = std::make_shared<StorageView>(
-                StorageID(database_name, table_name),
-                create,
-                ColumnsDescription(sample_block.getNamesAndTypesList()),
-                /* comment */ "",
-                /* is_parameterized_view */ true);
+            auto res = std::make_shared<StorageView>(StorageID(database_name, table_name),
+                                                     create,
+                                                     ColumnsDescription(sample_block.getNamesAndTypesList()),
+                                                     /* comment */ "",
+                                                     /* is_parameterized_view */ true);
             res->startup();
             function->prefer_subquery_to_function_formatting = true;
             return res;
         }
     }
-    auto hash = table_expression->getTreeHash(/*ignore_aliases=*/true);
+    auto hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
     auto key = toString(hash);
     StoragePtr & res = table_function_results[key];
     if (!res)
@@ -1855,19 +1714,21 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         catch (Exception & e)
         {
             if (e.code() == ErrorCodes::UNKNOWN_FUNCTION)
+            {
                 e.addMessage(" or incorrect parameterized view");
+            }
             throw;
         }
 
-        uint64_t use_structure_from_insertion_table_in_table_functions
-            = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
+        uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
         if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
         {
-            const auto & insert_columns
-                = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_columns = DatabaseCatalog::instance()
+                                              .getTable(getInsertionTable(), shared_from_this())
+                                              ->getInMemoryMetadataPtr()
+                                              ->getColumns();
 
-            const auto & insert_column_names
-                = hasInsertionTableColumnNames() ? *getInsertionTableColumnNames() : insert_columns.getOrdinary().getNames();
+            const auto & insert_column_names = hasInsertionTableColumnNames() ? *getInsertionTableColumnNames() : insert_columns.getOrdinary().getNames();
             DB::ColumnsDescription structure_hint;
 
             bool use_columns_from_insert_query = true;
@@ -1876,7 +1737,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             /// insert table columns to table function columns through names from SELECT expression.
 
             auto insert_column_name_it = insert_column_names.begin();
-            auto insert_column_names_end = insert_column_names.end(); /// end iterator of the range covered by possible asterisk
+            auto insert_column_names_end = insert_column_names.end();  /// end iterator of the range covered by possible asterisk
             auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
             bool asterisk = false;
             const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
@@ -1893,8 +1754,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                         if (asterisk)
                         {
                             if (use_structure_from_insertion_table_in_table_functions == 1)
-                                throw Exception(
-                                    ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
 
                             use_columns_from_insert_query = false;
                             break;
@@ -1927,8 +1787,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                     if (!structure_hint.empty())
                     {
                         if (use_structure_from_insertion_table_in_table_functions == 1)
-                            throw Exception(
-                                ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
 
                         use_columns_from_insert_query = false;
                         break;
@@ -1966,8 +1825,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             {
                 /// For input function we should check if input format supports reading subset of columns.
                 if (table_function_ptr->getName() == "input")
-                    use_columns_from_insert_query
-                        = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat(), shared_from_this());
+                    use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat(), shared_from_this());
                 else
                     use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(shared_from_this());
             }
@@ -1991,11 +1849,9 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
                     if (!structure_hint.empty())
                         table_function_ptr->setStructureHint(structure_hint);
-                }
-                else if (use_structure_from_insertion_table_in_table_functions == 1)
-                    throw Exception(
-                        ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH,
-                        "Number of columns in insert table less than required by SELECT expression.");
+
+                } else if (use_structure_from_insertion_table_in_table_functions == 1)
+                    throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
             }
         }
 
@@ -2005,7 +1861,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         ///
         ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
         ///
-        auto new_hash = table_expression->getTreeHash(/*ignore_aliases=*/true);
+        auto new_hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
         if (hash != new_hash)
         {
             key = toString(new_hash);
@@ -2017,12 +1873,14 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr)
 {
-    const auto hash = table_expression->getTreeHash(/*ignore_aliases=*/true);
+    const auto hash = table_expression->getTreeHash(/*ignore_aliases=*/ true);
     const auto key = toString(hash);
     StoragePtr & res = table_function_results[key];
 
     if (!res)
+    {
         res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+    }
 
     return res;
 }
@@ -2031,8 +1889,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 void Context::addViewSource(const StoragePtr & storage)
 {
     if (view_source)
-        throw Exception(
-            ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary view source storage {} already exists.", backQuoteIfNeed(view_source->getName()));
+        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary view source storage {} already exists.",
+            backQuoteIfNeed(view_source->getName()));
     view_source = storage;
 }
 
@@ -2093,12 +1951,13 @@ void Context::applySettingChangeWithLock(const SettingChange & change, const std
     catch (Exception & e)
     {
         e.addMessage(fmt::format(
-            "in attempt to set the value of setting '{}' to {}", change.name, applyVisitor(FieldVisitorToString(), change.value)));
+                         "in attempt to set the value of setting '{}' to {}",
+                         change.name, applyVisitor(FieldVisitorToString(), change.value)));
         throw;
     }
 }
 
-void Context::applySettingsChangesWithLock(const SettingsChanges & changes, const std::lock_guard<ContextSharedMutex> & lock)
+void Context::applySettingsChangesWithLock(const SettingsChanges & changes, const std::lock_guard<ContextSharedMutex>& lock)
 {
     for (const SettingChange & change : changes)
         applySettingChangeWithLock(change, lock);
@@ -2126,7 +1985,8 @@ void Context::applySettingChange(const SettingChange & change)
     catch (Exception & e)
     {
         e.addMessage(fmt::format(
-            "in attempt to set the value of setting '{}' to {}", change.name, applyVisitor(FieldVisitorToString(), change.value)));
+                         "in attempt to set the value of setting '{}' to {}",
+                         change.name, applyVisitor(FieldVisitorToString(), change.value)));
         throw;
     }
 }
@@ -2163,8 +2023,7 @@ void Context::clampToSettingsConstraintsWithLock(SettingsChanges & changes, Sett
     getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(settings, changes, source);
 }
 
-void Context::checkMergeTreeSettingsConstraintsWithLock(
-    const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const
+void Context::checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const
 {
     getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(merge_tree_settings, changes);
 }
@@ -2208,7 +2067,7 @@ void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_
 void Context::resetSettingsToDefaultValue(const std::vector<String> & names)
 {
     std::lock_guard lock(mutex);
-    for (const String & name : names)
+    for (const String & name: names)
         settings.setDefaultValue(name);
 }
 
@@ -2242,10 +2101,9 @@ String Context::getInitialQueryId() const
 void Context::setCurrentDatabaseNameInGlobalContext(const String & name)
 {
     if (!isGlobalContext())
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cannot set current database for non global context, this method should "
-            "be used during server initialization");
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Cannot set current database for non global context, this method should "
+                        "be used during server initialization");
     std::lock_guard lock(mutex);
 
     if (!current_database.empty())
@@ -2289,12 +2147,13 @@ void Context::setCurrentQueryId(const String & query_id)
 
 
     String query_id_to_set = query_id;
-    if (query_id_to_set.empty()) /// If the user did not submit his query_id, then we generate it ourselves.
+    if (query_id_to_set.empty())    /// If the user did not submit his query_id, then we generate it ourselves.
     {
         /// Use protected constructor.
         struct QueryUUID : Poco::UUID
         {
-            QueryUUID(const char * bytes, Poco::UUID::Version version) : Poco::UUID(bytes, version) { }
+            QueryUUID(const char * bytes, Poco::UUID::Version version)
+                : Poco::UUID(bytes, version) {}
         };
 
         query_id_to_set = QueryUUID(random.bytes, Poco::UUID::UUID_RANDOM).toString();
@@ -2360,8 +2219,7 @@ void Context::setMacros(std::unique_ptr<Macros> && macros)
 ContextMutablePtr Context::getQueryContext() const
 {
     auto ptr = query_context.lock();
-    if (!ptr)
-        throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired");
+    if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired");
     return ptr;
 }
 
@@ -2374,23 +2232,20 @@ bool Context::isInternalSubquery() const
 ContextMutablePtr Context::getSessionContext() const
 {
     auto ptr = session_context.lock();
-    if (!ptr)
-        throw Exception(ErrorCodes::THERE_IS_NO_SESSION, "There is no session or session context has expired");
+    if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_SESSION, "There is no session or session context has expired");
     return ptr;
 }
 
 ContextMutablePtr Context::getGlobalContext() const
 {
     auto ptr = global_context.lock();
-    if (!ptr)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired");
+    if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired");
     return ptr;
 }
 
 ContextMutablePtr Context::getBufferContext() const
 {
-    if (!buffer_context)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no buffer context");
+    if (!buffer_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no buffer context");
     return buffer_context;
 }
 
@@ -2486,11 +2341,11 @@ ExternalDictionariesLoader & Context::getExternalDictionariesLoader()
     return getExternalDictionariesLoaderWithLock(lock);
 }
 
-ExternalDictionariesLoader & Context::getExternalDictionariesLoaderWithLock(const std::lock_guard<std::mutex> &)
-    TSA_REQUIRES(shared->external_dictionaries_mutex)
+ExternalDictionariesLoader & Context::getExternalDictionariesLoaderWithLock(const std::lock_guard<std::mutex> &) TSA_REQUIRES(shared->external_dictionaries_mutex)
 {
     if (!shared->external_dictionaries_loader)
-        shared->external_dictionaries_loader = std::make_unique<ExternalDictionariesLoader>(getGlobalContext());
+        shared->external_dictionaries_loader =
+            std::make_unique<ExternalDictionariesLoader>(getGlobalContext());
     return *shared->external_dictionaries_loader;
 }
 
@@ -2506,12 +2361,11 @@ ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedEx
 }
 
 ExternalUserDefinedExecutableFunctionsLoader &
-Context::getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard<std::mutex> &)
-    TSA_REQUIRES(shared->external_user_defined_executable_functions_mutex)
+Context::getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard<std::mutex> &) TSA_REQUIRES(shared->external_user_defined_executable_functions_mutex)
 {
     if (!shared->external_user_defined_executable_functions_loader)
-        shared->external_user_defined_executable_functions_loader
-            = std::make_unique<ExternalUserDefinedExecutableFunctionsLoader>(getGlobalContext());
+        shared->external_user_defined_executable_functions_loader =
+            std::make_unique<ExternalUserDefinedExecutableFunctionsLoader>(getGlobalContext());
     return *shared->external_user_defined_executable_functions_loader;
 }
 
@@ -2523,8 +2377,10 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_
     {
         auto geo_dictionaries_loader = std::make_unique<GeoDictionariesLoader>();
 
-        shared->embedded_dictionaries
-            = std::make_unique<EmbeddedDictionaries>(std::move(geo_dictionaries_loader), getGlobalContext(), throw_on_error);
+        shared->embedded_dictionaries = std::make_unique<EmbeddedDictionaries>(
+            std::move(geo_dictionaries_loader),
+            getGlobalContext(),
+            throw_on_error);
     }
 
     return *shared->embedded_dictionaries;
@@ -2593,8 +2449,7 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr
     if (shared->user_defined_executable_functions_config_repository)
     {
         shared->user_defined_executable_functions_config_repository->updatePatterns(patterns);
-        external_user_defined_executable_functions_loader.reloadConfig(
-            shared->user_defined_executable_functions_config_repository->getName());
+        external_user_defined_executable_functions_loader.reloadConfig(shared->user_defined_executable_functions_config_repository->getName());
         return;
     }
 
@@ -2602,15 +2457,14 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr
     auto config_path = getConfigRef().getString("config-file", "config.xml");
     auto repository = std::make_unique<ExternalLoaderXMLConfigRepository>(app_path, config_path, patterns);
     shared->user_defined_executable_functions_config_repository = repository.get();
-    shared->user_defined_executable_functions_xmls
-        = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository));
+    shared->user_defined_executable_functions_xmls = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository));
 }
 
 const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() const
 {
-    callOnce(
-        shared->user_defined_sql_objects_loader_initialized,
-        [&] { shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); });
+    callOnce(shared->user_defined_sql_objects_loader_initialized, [&] {
+        shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
+    });
 
     SharedLockGuard lock(shared->mutex);
     return *shared->user_defined_sql_objects_loader;
@@ -2618,9 +2472,9 @@ const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() c
 
 IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader()
 {
-    callOnce(
-        shared->user_defined_sql_objects_loader_initialized,
-        [&] { shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); });
+    callOnce(shared->user_defined_sql_objects_loader_initialized, [&] {
+        shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
+    });
 
     SharedLockGuard lock(shared->mutex);
     return *shared->user_defined_sql_objects_loader;
@@ -2630,14 +2484,18 @@ IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader()
 
 SynonymsExtensions & Context::getSynonymsExtensions() const
 {
-    callOnce(shared->synonyms_extensions_initialized, [&] { shared->synonyms_extensions.emplace(getConfigRef()); });
+    callOnce(shared->synonyms_extensions_initialized, [&] {
+        shared->synonyms_extensions.emplace(getConfigRef());
+    });
 
     return *shared->synonyms_extensions;
 }
 
 Lemmatizers & Context::getLemmatizers() const
 {
-    callOnce(shared->lemmatizers_initialized, [&] { shared->lemmatizers.emplace(getConfigRef()); });
+    callOnce(shared->lemmatizers_initialized, [&] {
+        shared->lemmatizers.emplace(getConfigRef());
+    });
 
     return *shared->lemmatizers;
 }
@@ -2645,21 +2503,17 @@ Lemmatizers & Context::getLemmatizers() const
 
 BackupsWorker & Context::getBackupsWorker() const
 {
-    callOnce(
-        shared->backups_worker_initialized,
-        [&]
-        {
-            const auto & config = getConfigRef();
-            const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true);
-            const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true);
+    callOnce(shared->backups_worker_initialized, [&] {
+        const auto & config = getConfigRef();
+        const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true);
+        const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true);
 
-            const auto & settings_ref = getSettingsRef();
-            UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
-            UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
+        const auto & settings_ref = getSettingsRef();
+        UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
+        UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
 
-            shared->backups_worker.emplace(
-                getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
-        });
+        shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
+    });
 
     return *shared->backups_worker;
 }
@@ -2774,21 +2628,13 @@ void Context::clearMarkCache() const
 
 ThreadPool & Context::getLoadMarksThreadpool() const
 {
-    callOnce(
-        shared->load_marks_threadpool_initialized,
-        [&]
-        {
-            const auto & config = getConfigRef();
-            auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50);
-            auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000);
-            shared->load_marks_threadpool = std::make_unique<ThreadPool>(
-                CurrentMetrics::MarksLoaderThreads,
-                CurrentMetrics::MarksLoaderThreadsActive,
-                CurrentMetrics::MarksLoaderThreadsScheduled,
-                pool_size,
-                pool_size,
-                queue_size);
-        });
+    callOnce(shared->load_marks_threadpool_initialized, [&] {
+        const auto & config = getConfigRef();
+        auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50);
+        auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000);
+        shared->load_marks_threadpool = std::make_unique<ThreadPool>(
+            CurrentMetrics::MarksLoaderThreads, CurrentMetrics::MarksLoaderThreadsActive, CurrentMetrics::MarksLoaderThreadsScheduled, pool_size, pool_size, queue_size);
+    });
 
     return *shared->load_marks_threadpool;
 }
@@ -2965,21 +2811,13 @@ void Context::clearCaches() const
 
 ThreadPool & Context::getPrefetchThreadpool() const
 {
-    callOnce(
-        shared->prefetch_threadpool_initialized,
-        [&]
-        {
-            const auto & config = getConfigRef();
-            auto pool_size = config.getUInt(".prefetch_threadpool_pool_size", 100);
-            auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
-            shared->prefetch_threadpool = std::make_unique<ThreadPool>(
-                CurrentMetrics::IOPrefetchThreads,
-                CurrentMetrics::IOPrefetchThreadsActive,
-                CurrentMetrics::IOPrefetchThreadsScheduled,
-                pool_size,
-                pool_size,
-                queue_size);
-        });
+    callOnce(shared->prefetch_threadpool_initialized, [&] {
+        const auto & config = getConfigRef();
+        auto pool_size = config.getUInt(".prefetch_threadpool_pool_size", 100);
+        auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
+        shared->prefetch_threadpool = std::make_unique<ThreadPool>(
+            CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, CurrentMetrics::IOPrefetchThreadsScheduled, pool_size, pool_size, queue_size);
+    });
 
     return *shared->prefetch_threadpool;
 }
@@ -2992,16 +2830,13 @@ size_t Context::getPrefetchThreadpoolSize() const
 
 BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
 {
-    callOnce(
-        shared->buffer_flush_schedule_pool_initialized,
-        [&]
-        {
-            shared->buffer_flush_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-                shared->server_settings.background_buffer_flush_schedule_pool_size,
-                CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
-                CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
-                "BgBufSchPool");
-        });
+    callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {
+        shared->buffer_flush_schedule_pool = std::make_unique<BackgroundSchedulePool>(
+            shared->server_settings.background_buffer_flush_schedule_pool_size,
+            CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
+            CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
+            "BgBufSchPool");
+    });
 
     return *shared->buffer_flush_schedule_pool;
 }
@@ -3013,16 +2848,11 @@ BackgroundTaskSchedulingSettings Context::getBackgroundProcessingTaskSchedulingS
     const auto & config = getConfigRef();
     task_settings.thread_sleep_seconds = config.getDouble("background_processing_pool_thread_sleep_seconds", 10);
     task_settings.thread_sleep_seconds_random_part = config.getDouble("background_processing_pool_thread_sleep_seconds_random_part", 1.0);
-    task_settings.thread_sleep_seconds_if_nothing_to_do
-        = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
-    task_settings.task_sleep_seconds_when_no_work_min
-        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10);
-    task_settings.task_sleep_seconds_when_no_work_max
-        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600);
-    task_settings.task_sleep_seconds_when_no_work_multiplier
-        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
-    task_settings.task_sleep_seconds_when_no_work_random_part
-        = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
+    task_settings.thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
+    task_settings.task_sleep_seconds_when_no_work_min = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_min", 10);
+    task_settings.task_sleep_seconds_when_no_work_max = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_max", 600);
+    task_settings.task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
+    task_settings.task_sleep_seconds_when_no_work_random_part = config.getDouble("background_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
     return task_settings;
 }
 
@@ -3032,66 +2862,51 @@ BackgroundTaskSchedulingSettings Context::getBackgroundMoveTaskSchedulingSetting
 
     const auto & config = getConfigRef();
     task_settings.thread_sleep_seconds = config.getDouble("background_move_processing_pool_thread_sleep_seconds", 10);
-    task_settings.thread_sleep_seconds_random_part
-        = config.getDouble("background_move_processing_pool_thread_sleep_seconds_random_part", 1.0);
-    task_settings.thread_sleep_seconds_if_nothing_to_do
-        = config.getDouble("background_move_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
-    task_settings.task_sleep_seconds_when_no_work_min
-        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_min", 10);
-    task_settings.task_sleep_seconds_when_no_work_max
-        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_max", 600);
-    task_settings.task_sleep_seconds_when_no_work_multiplier
-        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
-    task_settings.task_sleep_seconds_when_no_work_random_part
-        = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
+    task_settings.thread_sleep_seconds_random_part = config.getDouble("background_move_processing_pool_thread_sleep_seconds_random_part", 1.0);
+    task_settings.thread_sleep_seconds_if_nothing_to_do = config.getDouble("background_move_processing_pool_thread_sleep_seconds_if_nothing_to_do", 0.1);
+    task_settings.task_sleep_seconds_when_no_work_min = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_min", 10);
+    task_settings.task_sleep_seconds_when_no_work_max = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_max", 600);
+    task_settings.task_sleep_seconds_when_no_work_multiplier = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_multiplier", 1.1);
+    task_settings.task_sleep_seconds_when_no_work_random_part = config.getDouble("background_move_processing_pool_task_sleep_seconds_when_no_work_random_part", 1.0);
 
     return task_settings;
 }
 
 BackgroundSchedulePool & Context::getSchedulePool() const
 {
-    callOnce(
-        shared->schedule_pool_initialized,
-        [&]
-        {
-            shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
-                shared->server_settings.background_schedule_pool_size,
-                CurrentMetrics::BackgroundSchedulePoolTask,
-                CurrentMetrics::BackgroundSchedulePoolSize,
-                "BgSchPool");
-        });
+    callOnce(shared->schedule_pool_initialized, [&] {
+        shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
+            shared->server_settings.background_schedule_pool_size,
+            CurrentMetrics::BackgroundSchedulePoolTask,
+            CurrentMetrics::BackgroundSchedulePoolSize,
+            "BgSchPool");
+    });
 
     return *shared->schedule_pool;
 }
 
 BackgroundSchedulePool & Context::getDistributedSchedulePool() const
 {
-    callOnce(
-        shared->distributed_schedule_pool_initialized,
-        [&]
-        {
-            shared->distributed_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-                shared->server_settings.background_distributed_schedule_pool_size,
-                CurrentMetrics::BackgroundDistributedSchedulePoolTask,
-                CurrentMetrics::BackgroundDistributedSchedulePoolSize,
-                "BgDistSchPool");
-        });
+    callOnce(shared->distributed_schedule_pool_initialized, [&] {
+        shared->distributed_schedule_pool = std::make_unique<BackgroundSchedulePool>(
+            shared->server_settings.background_distributed_schedule_pool_size,
+            CurrentMetrics::BackgroundDistributedSchedulePoolTask,
+            CurrentMetrics::BackgroundDistributedSchedulePoolSize,
+            "BgDistSchPool");
+    });
 
     return *shared->distributed_schedule_pool;
 }
 
 BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
 {
-    callOnce(
-        shared->message_broker_schedule_pool_initialized,
-        [&]
-        {
-            shared->message_broker_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-                shared->server_settings.background_message_broker_schedule_pool_size,
-                CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
-                CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
-                "BgMBSchPool");
-        });
+    callOnce(shared->message_broker_schedule_pool_initialized, [&] {
+        shared->message_broker_schedule_pool = std::make_unique<BackgroundSchedulePool>(
+            shared->server_settings.background_message_broker_schedule_pool_size,
+            CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
+            CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
+            "BgMBSchPool");
+    });
 
     return *shared->message_broker_schedule_pool;
 }
@@ -3290,10 +3105,7 @@ bool Context::tryCheckClientConnectionToMyKeeperCluster() const
             {
                 if (checkZooKeeperConfigIsLocal(getConfigRef(), "auxiliary_zookeepers." + aux_zk_name))
                 {
-                    LOG_DEBUG(
-                        shared->log,
-                        "Our Keeper server is participant of the auxiliary zookeeper cluster ({}), will try to connect to it",
-                        aux_zk_name);
+                    LOG_DEBUG(shared->log, "Our Keeper server is participant of the auxiliary zookeeper cluster ({}), will try to connect to it", aux_zk_name);
                     getAuxiliaryZooKeeper(aux_zk_name);
                     /// Connected, return true
                     return true;
@@ -3366,17 +3178,13 @@ void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) cons
         if (start_async)
         {
             assert(!is_standalone_app);
-            LOG_INFO(
-                shared->log,
-                "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster, "
-                "will wait for Keeper asynchronously");
+            LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster, "
+                     "will wait for Keeper asynchronously");
         }
         else
         {
-            LOG_INFO(
-                shared->log,
-                "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start, "
-                "will wait for Keeper synchronously");
+            LOG_INFO(shared->log, "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start, "
+                     "will wait for Keeper synchronously");
         }
 
         shared->keeper_dispatcher = std::make_shared<KeeperDispatcher>();
@@ -3445,9 +3253,8 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
                 "config.xml",
                 name);
 
-        zookeeper = shared->auxiliary_zookeepers
-                        .emplace(name, std::make_shared<zkutil::ZooKeeper>(config, "auxiliary_zookeepers." + name, getZooKeeperLog()))
-                        .first;
+        zookeeper = shared->auxiliary_zookeepers.emplace(name,
+                        std::make_shared<zkutil::ZooKeeper>(config, "auxiliary_zookeepers." + name, getZooKeeperLog())).first;
     }
     else if (zookeeper->second->expired())
         zookeeper->second = zookeeper->second->startNewSession();
@@ -3554,12 +3361,11 @@ void Context::setInterserverIOAddress(const String & host, UInt16 port)
 std::pair<String, UInt16> Context::getInterserverIOAddress() const
 {
     if (shared->interserver_io_host.empty() || shared->interserver_io_port == 0)
-        throw Exception(
-            ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-            "Parameter 'interserver_http(s)_port' required for replication is not specified "
-            "in configuration file.");
+        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+                        "Parameter 'interserver_http(s)_port' required for replication is not specified "
+                        "in configuration file.");
 
-    return {shared->interserver_io_host, shared->interserver_io_port};
+    return { shared->interserver_io_host, shared->interserver_io_port };
 }
 
 void Context::setInterserverScheme(const String & scheme)
@@ -3633,13 +3439,13 @@ void Context::setMaxPartNumToWarn(size_t max_part_to_warn)
 void Context::setMaxTableNumToWarn(size_t max_table_to_warn)
 {
     SharedLockGuard lock(shared->mutex);
-    shared->max_table_num_to_warn = max_table_to_warn;
+    shared->max_table_num_to_warn= max_table_to_warn;
 }
 
 void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn)
 {
     SharedLockGuard lock(shared->mutex);
-    shared->max_database_num_to_warn = max_database_to_warn;
+    shared->max_database_num_to_warn= max_database_to_warn;
 }
 
 std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) const
@@ -3735,7 +3541,9 @@ void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_dis
 {
     std::lock_guard lock(shared->clusters_mutex);
     if (ConfigHelper::getBool(*config, "allow_experimental_cluster_discovery") && enable_discovery && !shared->cluster_discovery)
+    {
         shared->cluster_discovery = std::make_unique<ClusterDiscovery>(*config, getGlobalContext());
+    }
 
     /// Do not update clusters if this part of config wasn't changed.
     if (shared->clusters && isSameConfiguration(*config, *shared->clusters_config, config_name))
@@ -3768,14 +3576,11 @@ void Context::initializeSystemLogs()
     /// triggered from another thread, that is launched while initializing the system logs,
     /// for example, system.filesystem_cache_log will be triggered by parts loading
     /// of any other table if it is stored on a disk with cache.
-    callOnce(
-        shared->system_logs_initialized,
-        [&]
-        {
-            auto system_logs = std::make_unique<SystemLogs>(getGlobalContext(), getConfigRef());
-            std::lock_guard lock(shared->mutex);
-            shared->system_logs = std::move(system_logs);
-        });
+    callOnce(shared->system_logs_initialized, [&] {
+        auto system_logs = std::make_unique<SystemLogs>(getGlobalContext(), getConfigRef());
+        std::lock_guard lock(shared->mutex);
+        shared->system_logs = std::move(system_logs);
+    });
 }
 
 void Context::initializeTraceCollector()
@@ -4116,8 +3921,7 @@ DiskSelectorPtr Context::getDiskSelector(std::lock_guard<std::mutex> & /* lock *
     return shared->merge_tree_disk_selector;
 }
 
-StoragePolicySelectorPtr Context::getStoragePolicySelector(std::lock_guard<std::mutex> & lock) const
-    TSA_REQUIRES(shared->storage_policies_mutex)
+StoragePolicySelectorPtr Context::getStoragePolicySelector(std::lock_guard<std::mutex> & lock) const TSA_REQUIRES(shared->storage_policies_mutex)
 {
     if (!shared->merge_tree_storage_policy_selector)
     {
@@ -4149,9 +3953,7 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
             catch (Exception & e)
             {
                 LOG_ERROR(
-                    shared->log,
-                    "An error has occurred while reloading storage policies, storage policies were not applied: {}",
-                    e.message());
+                    shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message());
             }
         }
 
@@ -4167,6 +3969,7 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration
         if (shared->storage_s3_settings)
             shared->storage_s3_settings->loadFromConfig("s3", config, getSettingsRef());
     }
+
 }
 
 
@@ -4238,24 +4041,19 @@ void Context::checkCanBeDropped(const String & database, const String & table, c
 
     String size_str = formatReadableSizeWithDecimalSuffix(size);
     String max_size_to_drop_str = formatReadableSizeWithDecimalSuffix(max_size_to_drop);
-    throw Exception(
-        ErrorCodes::TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT,
-        "Table or Partition in {}.{} was not dropped.\nReason:\n"
-        "1. Size ({}) is greater than max_[table/partition]_size_to_drop ({})\n"
-        "2. File '{}' intended to force DROP {}\n"
-        "How to fix this:\n"
-        "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n"
-        "2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
-        "Example:\nsudo touch '{}' && sudo chmod 666 '{}'",
-        backQuoteIfNeed(database),
-        backQuoteIfNeed(table),
-        size_str,
-        max_size_to_drop_str,
-        force_file.string(),
-        force_file_exists ? "exists but not writeable (could not be removed)" : "doesn't exist",
-        force_file.string(),
-        force_file.string(),
-        force_file.string());
+    throw Exception(ErrorCodes::TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT,
+                    "Table or Partition in {}.{} was not dropped.\nReason:\n"
+                    "1. Size ({}) is greater than max_[table/partition]_size_to_drop ({})\n"
+                    "2. File '{}' intended to force DROP {}\n"
+                    "How to fix this:\n"
+                    "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n"
+                    "2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
+                    "Example:\nsudo touch '{}' && sudo chmod 666 '{}'",
+                    backQuoteIfNeed(database), backQuoteIfNeed(table),
+                    size_str, max_size_to_drop_str,
+                    force_file.string(), force_file_exists ? "exists but not writeable (could not be removed)" : "doesn't exist",
+                    force_file.string(),
+                    force_file.string(), force_file.string());
 }
 
 
@@ -4287,7 +4085,7 @@ void Context::setClientHTTPHeaderForbiddenHeaders(const String & forbidden_heade
 
 void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
 {
-    shared->allow_get_client_http_header = allow_get_http_header_function;
+    shared->allow_get_client_http_header= allow_get_http_header_function;
 }
 
 const std::unordered_set<String> & Context::getClientHTTPHeaderForbiddenHeaders() const
@@ -4319,13 +4117,7 @@ void Context::checkPartitionCanBeDropped(const String & database, const String &
 }
 
 
-InputFormatPtr Context::getInputFormat(
-    const String & name,
-    ReadBuffer & buf,
-    const Block & sample,
-    UInt64 max_block_size,
-    const std::optional<FormatSettings> & format_settings,
-    const std::optional<size_t> max_parsing_threads) const
+InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional<FormatSettings> & format_settings, const std::optional<size_t> max_parsing_threads) const
 {
     return FormatFactory::instance().getInput(name, buf, sample, shared_from_this(), max_block_size, format_settings, max_parsing_threads);
 }
@@ -4514,9 +4306,9 @@ const IHostContextPtr & Context::getHostContext() const
 
 std::shared_ptr<ActionLocksManager> Context::getActionLocksManager() const
 {
-    callOnce(
-        shared->action_locks_manager_initialized,
-        [&] { shared->action_locks_manager = std::make_shared<ActionLocksManager>(shared_from_this()); });
+    callOnce(shared->action_locks_manager_initialized, [&] {
+        shared->action_locks_manager = std::make_shared<ActionLocksManager>(shared_from_this());
+    });
 
     return shared->action_locks_manager;
 }
@@ -4603,8 +4395,7 @@ void Context::setClientInterface(ClientInfo::Interface interface)
     need_recalculate_access = true;
 }
 
-void Context::setClientVersion(
-    UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+void Context::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
 {
     client_info.client_version_major = client_version_major;
     client_info.client_version_minor = client_version_minor;
@@ -4617,11 +4408,7 @@ void Context::setClientConnectionId(uint32_t connection_id_)
     client_info.connection_id = connection_id_;
 }
 
-void Context::setHttpClientInfo(
-    ClientInfo::HTTPMethod http_method,
-    const String & http_user_agent,
-    const String & http_referer,
-    const Poco::Net::NameValueCollection & http_headers)
+void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
 {
     client_info.http_method = http_method;
     client_info.http_user_agent = http_user_agent;
@@ -4696,8 +4483,7 @@ void Context::setQuotaClientKey(const String & quota_key_)
     need_recalculate_access = true;
 }
 
-void Context::setConnectionClientVersion(
-    UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
 {
     client_info.connection_client_version_major = client_version_major;
     client_info.connection_client_version_minor = client_version_minor;
@@ -4778,12 +4564,10 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
     if (!storage_id.database_name.empty())
     {
         if (in_specified_database)
-            return storage_id; /// NOTE There is no guarantees that table actually exists in database.
+            return storage_id;     /// NOTE There is no guarantees that table actually exists in database.
         if (exception)
-            exception->emplace(Exception(
-                ErrorCodes::UNKNOWN_TABLE,
-                "External and temporary tables have no database, but {} is specified",
-                storage_id.database_name));
+            exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "External and temporary tables have no database, but {} is specified",
+                               storage_id.database_name));
         return StorageID::createEmpty();
     }
 
@@ -4868,11 +4652,9 @@ void Context::checkTransactionsAreAllowed(bool explicit_tcl_query /* = false */)
     if (explicit_tcl_query)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported");
 
-    throw Exception(
-        ErrorCodes::LOGICAL_ERROR,
-        "Experimental support for transactions is disabled, "
-        "however, some query or background task tried to access TransactionLog. "
-        "If you have not enabled this feature explicitly, then it's a bug.");
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Experimental support for transactions is disabled, "
+                    "however, some query or background task tried to access TransactionLog. "
+                    "If you have not enabled this feature explicitly, then it's a bug.");
 }
 
 void Context::initCurrentTransaction(MergeTreeTransactionPtr txn)
@@ -5033,55 +4815,47 @@ void Context::initializeBackgroundExecutorsIfNeeded()
     size_t background_common_pool_size = server_settings.background_common_pool_size;
 
     /// With this executor we can execute more tasks than threads we have
-    shared->merge_mutate_executor = std::make_shared<MergeMutateBackgroundExecutor>(
+    shared->merge_mutate_executor = std::make_shared<MergeMutateBackgroundExecutor>
+    (
         "MergeMutate",
-        /*max_threads_count*/ background_pool_size,
-        /*max_tasks_count*/ background_pool_max_tasks_count,
+        /*max_threads_count*/background_pool_size,
+        /*max_tasks_count*/background_pool_max_tasks_count,
         CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
         CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
-        background_merges_mutations_scheduling_policy);
-    LOG_INFO(
-        shared->log,
-        "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
-        background_pool_size,
-        background_pool_max_tasks_count,
-        background_merges_mutations_scheduling_policy);
+        background_merges_mutations_scheduling_policy
+    );
+    LOG_INFO(shared->log, "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
+        background_pool_size, background_pool_max_tasks_count, background_merges_mutations_scheduling_policy);
 
-    shared->moves_executor = std::make_shared<OrdinaryBackgroundExecutor>(
+    shared->moves_executor = std::make_shared<OrdinaryBackgroundExecutor>
+    (
         "Move",
         background_move_pool_size,
         background_move_pool_size,
         CurrentMetrics::BackgroundMovePoolTask,
-        CurrentMetrics::BackgroundMovePoolSize);
-    LOG_INFO(
-        shared->log,
-        "Initialized background executor for move operations with num_threads={}, num_tasks={}",
-        background_move_pool_size,
-        background_move_pool_size);
+        CurrentMetrics::BackgroundMovePoolSize
+    );
+    LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
 
-    shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>(
+    shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>
+    (
         "Fetch",
         background_fetches_pool_size,
         background_fetches_pool_size,
         CurrentMetrics::BackgroundFetchesPoolTask,
-        CurrentMetrics::BackgroundFetchesPoolSize);
-    LOG_INFO(
-        shared->log,
-        "Initialized background executor for fetches with num_threads={}, num_tasks={}",
-        background_fetches_pool_size,
-        background_fetches_pool_size);
+        CurrentMetrics::BackgroundFetchesPoolSize
+    );
+    LOG_INFO(shared->log, "Initialized background executor for fetches with num_threads={}, num_tasks={}", background_fetches_pool_size, background_fetches_pool_size);
 
-    shared->common_executor = std::make_shared<OrdinaryBackgroundExecutor>(
+    shared->common_executor = std::make_shared<OrdinaryBackgroundExecutor>
+    (
         "Common",
         background_common_pool_size,
         background_common_pool_size,
         CurrentMetrics::BackgroundCommonPoolTask,
-        CurrentMetrics::BackgroundCommonPoolSize);
-    LOG_INFO(
-        shared->log,
-        "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}",
-        background_common_pool_size,
-        background_common_pool_size);
+        CurrentMetrics::BackgroundCommonPoolSize
+    );
+    LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", background_common_pool_size, background_common_pool_size);
 
     shared->are_background_executors_initialized = true;
 }
@@ -5118,15 +4892,12 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const
 
 IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const
 {
-    callOnce(
-        shared->readers_initialized,
-        [&]
-        {
-            const auto & config = getConfigRef();
-            shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config);
-            shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config);
-            shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config);
-        });
+    callOnce(shared->readers_initialized, [&] {
+        const auto & config = getConfigRef();
+        shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config);
+        shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config);
+        shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config);
+    });
 
     switch (type)
     {
@@ -5141,22 +4912,14 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co
 
 ThreadPool & Context::getThreadPoolWriter() const
 {
-    callOnce(
-        shared->threadpool_writer_initialized,
-        [&]
-        {
-            const auto & config = getConfigRef();
-            auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100);
-            auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
+    callOnce(shared->threadpool_writer_initialized, [&] {
+        const auto & config = getConfigRef();
+        auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100);
+        auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
 
-            shared->threadpool_writer = std::make_unique<ThreadPool>(
-                CurrentMetrics::IOWriterThreads,
-                CurrentMetrics::IOWriterThreadsActive,
-                CurrentMetrics::IOWriterThreadsScheduled,
-                pool_size,
-                pool_size,
-                queue_size);
-        });
+        shared->threadpool_writer = std::make_unique<ThreadPool>(
+            CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size);
+    });
 
     return *shared->threadpool_writer;
 }
@@ -5200,7 +4963,10 @@ ReadSettings Context::getReadSettings() const
 
     /// Zero read buffer will not make progress.
     if (!settings.max_read_buffer_size)
-        throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size);
+    {
+        throw Exception(ErrorCodes::INVALID_SETTING_VALUE,
+            "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size);
+    }
 
     res.local_fs_buffer_size
         = settings.max_read_buffer_size_local_fs ? settings.max_read_buffer_size_local_fs : settings.max_read_buffer_size;
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index 2e357a6080c..f8043e9d2a5 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -1,14 +1,15 @@
-CREATE TABLE test_max_num_to_warn_1 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_2 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_3 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_4 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_5 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_6 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_7 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_8 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_9 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_10 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_11 (id Int32, str String) Engine=Memory;
+CREATE DATABASE test_max_num_to_warn_02931;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_1 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_2 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_3 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_4 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_5 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_6 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_7 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_8 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_9 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_10 (id Int32, str String) Engine=Memory;
+CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_11 (id Int32, str String) Engine=Memory;
 
 CREATE DATABASE test_max_num_to_warn_1;
 CREATE DATABASE test_max_num_to_warn_2;
@@ -22,32 +23,21 @@ CREATE DATABASE test_max_num_to_warn_9;
 CREATE DATABASE test_max_num_to_warn_10;
 CREATE DATABASE test_max_num_to_warn_11;
 
-INSERT INTO test_max_num_to_warn_1 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_2 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_3 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_4 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_5 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_6 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_7 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_8 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_9 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_10 VALUES (1, 'Hello');
-INSERT INTO test_max_num_to_warn_11 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_1 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_2 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_3 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_4 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_5 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_6 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_7 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_8 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_9 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_10 VALUES (1, 'Hello');
+INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello');
 
 SELECT * FROM system.warnings where message in ('The number of attached tables is more than 10', 'The number of attached databases is more than 10', 'The number of active parts is more than 10');
 
-DROP TABLE test_max_num_to_warn_1;
-DROP TABLE test_max_num_to_warn_2;
-DROP TABLE test_max_num_to_warn_3;
-DROP TABLE test_max_num_to_warn_4;
-DROP TABLE test_max_num_to_warn_5;
-DROP TABLE test_max_num_to_warn_6;
-DROP TABLE test_max_num_to_warn_7;
-DROP TABLE test_max_num_to_warn_8;
-DROP TABLE test_max_num_to_warn_9;
-DROP TABLE test_max_num_to_warn_10;
-DROP TABLE test_max_num_to_warn_11;
-
+DROP DATABASE test_max_num_to_warn_02931;
 DROP DATABASE test_max_num_to_warn_1;
 DROP DATABASE test_max_num_to_warn_2;
 DROP DATABASE test_max_num_to_warn_3;

From ca790b6eecade9f23a7cbbfaf755332182e263ce Mon Sep 17 00:00:00 2001
From: Ryan Jacobs <ryan.jacobs@tophap.com>
Date: Wed, 6 Dec 2023 19:26:50 -0800
Subject: [PATCH 072/213] apply python style check

---
 .../test_storage_postgresql/test.py           | 37 +++++++++++++------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py
index 8e1be600687..a1b13739b5b 100644
--- a/tests/integration/test_storage_postgresql/test.py
+++ b/tests/integration/test_storage_postgresql/test.py
@@ -202,16 +202,19 @@ def test_postgres_conversions(started_cluster):
     cursor.execute(f"DROP TABLE test_types")
     cursor.execute(f"DROP TABLE test_array_dimensions")
 
+
 def test_postgres_array_ndim_error_messges(started_cluster):
     cursor = started_cluster.postgres_conn.cursor()
 
     # cleanup
-    cursor.execute('DROP VIEW  IF EXISTS array_ndim_view;')
-    cursor.execute('DROP TABLE IF EXISTS array_ndim_table;')
+    cursor.execute("DROP VIEW  IF EXISTS array_ndim_view;")
+    cursor.execute("DROP TABLE IF EXISTS array_ndim_table;")
 
     # setup
-    cursor.execute('CREATE TABLE array_ndim_table (x INTEGER, "Mixed-case with spaces" INTEGER[]);')
-    cursor.execute('CREATE VIEW  array_ndim_view AS SELECT * FROM array_ndim_table;')
+    cursor.execute(
+        'CREATE TABLE array_ndim_table (x INTEGER, "Mixed-case with spaces" INTEGER[]);'
+    )
+    cursor.execute("CREATE VIEW  array_ndim_view AS SELECT * FROM array_ndim_table;")
     describe_table = """
     DESCRIBE TABLE postgresql(
         'postgres1:5432', 'postgres', 'array_ndim_view',
@@ -225,30 +228,40 @@ def test_postgres_array_ndim_error_messges(started_cluster):
         node1.query(describe_table)
         assert False
     except Exception as error:
-        assert ('PostgreSQL relation containing arrays cannot be empty: array_ndim_view' in str(error))
+        assert (
+            "PostgreSQL relation containing arrays cannot be empty: array_ndim_view"
+            in str(error)
+        )
 
     # View cannot have empty array. Should throw useful error message.
     # (Cannot infer array dimension.)
-    cursor.execute('TRUNCATE array_ndim_table;')
+    cursor.execute("TRUNCATE array_ndim_table;")
     cursor.execute("INSERT INTO array_ndim_table VALUES (1234, '{}');")
     try:
         node1.query(describe_table)
         assert False
     except Exception as error:
-        assert ('PostgreSQL cannot infer dimensions of an empty array: array_ndim_view."Mixed-case with spaces"' in str(error))
+        assert (
+            'PostgreSQL cannot infer dimensions of an empty array: array_ndim_view."Mixed-case with spaces"'
+            in str(error)
+        )
 
     # View cannot have NULL array value. Should throw useful error message.
-    cursor.execute('TRUNCATE array_ndim_table;')
-    cursor.execute('INSERT INTO array_ndim_table VALUES (1234, NULL);')
+    cursor.execute("TRUNCATE array_ndim_table;")
+    cursor.execute("INSERT INTO array_ndim_table VALUES (1234, NULL);")
     try:
         node1.query(describe_table)
         assert False
     except Exception as error:
-        assert ('PostgreSQL array cannot be NULL: array_ndim_view."Mixed-case with spaces"' in str(error))
+        assert (
+            'PostgreSQL array cannot be NULL: array_ndim_view."Mixed-case with spaces"'
+            in str(error)
+        )
 
     # cleanup
-    cursor.execute('DROP VIEW  IF EXISTS array_ndim_view;')
-    cursor.execute('DROP TABLE IF EXISTS array_ndim_table;')
+    cursor.execute("DROP VIEW  IF EXISTS array_ndim_view;")
+    cursor.execute("DROP TABLE IF EXISTS array_ndim_table;")
+
 
 def test_non_default_schema(started_cluster):
     node1.query("DROP TABLE IF EXISTS test_pg_table_schema")

From 2c6604ec24db0909e7d9273cf5fc3202120e4abc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 7 Dec 2023 17:39:09 +0100
Subject: [PATCH 073/213] Allow nodes in config with from_env/from_zk and non
 empty element with replace=1

Such nodes in config are useful as a nodes with default values, that can
be overwritten from ZooKeeper/env.

So after this patch the following is valid, and is interpreted as
default value 86400, and can be overwritten via env:

```xml
<asynchronous_metrics_update_period_s replace="1" from_env="CH_ASYNCHRONOUS_METRICS_UPDATE_PERIOD_S">86400</asynchronous_metrics_update_period_s>
```

While the following is not:

```xml
<asynchronous_metrics_update_period_s             from_env="CH_ASYNCHRONOUS_METRICS_UPDATE_PERIOD_S">86400</asynchronous_metrics_update_period_s>
```

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/Config/ConfigProcessor.cpp         | 10 +--
 .../configs/000-config_with_env_subst.xml     |  1 +
 .../test_config_substitutions/test.py         | 68 ++++++++++++++++++-
 3 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index c213b7257d9..92e66fee489 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -519,8 +519,9 @@ void ConfigProcessor::doIncludesRecursive(
 
     if (attr_nodes["from_zk"]) /// we have zookeeper subst
     {
-        if (node->hasChildNodes()) /// only allow substitution for nodes with no value
-            throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_zk substitution");
+        /// only allow substitution for nodes with no value and without "replace"
+        if (node->hasChildNodes() && !replace)
+            throw Poco::Exception("Element <" + node->nodeName() + "> has value and does not have 'replace' attribute, can't process from_zk substitution");
 
         contributing_zk_paths.insert(attr_nodes["from_zk"]->getNodeValue());
 
@@ -544,8 +545,9 @@ void ConfigProcessor::doIncludesRecursive(
 
     if (attr_nodes["from_env"]) /// we have env subst
     {
-        if (node->hasChildNodes()) /// only allow substitution for nodes with no value
-            throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_env substitution");
+        /// only allow substitution for nodes with no value and without "replace"
+        if (node->hasChildNodes() && !replace)
+            throw Poco::Exception("Element <" + node->nodeName() + "> has value and does not have 'replace' attribute, can't process from_env substitution");
 
         XMLDocumentPtr env_document;
         auto get_env_node = [&](const std::string & name) -> const Node *
diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml
index ffa26488874..b029dd3bd2e 100644
--- a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml
+++ b/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml
@@ -2,6 +2,7 @@
   <profiles>
     <default>
         <max_query_size from_env="MAX_QUERY_SIZE" />
+        <max_threads replace="1" from_env="MAX_THREADS">1</max_threads>
     </default>
   </profiles>
   <users>
diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py
index 46961e5da71..564985b2f50 100644
--- a/tests/integration/test_config_substitutions/test.py
+++ b/tests/integration/test_config_substitutions/test.py
@@ -1,6 +1,7 @@
 import pytest
 
 from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
@@ -36,9 +37,13 @@ node7 = cluster.add_instance(
         "configs/000-config_with_env_subst.xml",
         "configs/010-env_subst_override.xml",
     ],
-    env_variables={"MAX_QUERY_SIZE": "121212"},
+    env_variables={
+        # overridden with 424242
+        "MAX_QUERY_SIZE": "121212",
+        "MAX_THREADS": "2",
+    },
     instance_env_variables=True,
-)  # overridden with 424242
+)
 
 
 @pytest.fixture(scope="module")
@@ -91,6 +96,65 @@ def test_config(start_cluster):
         node7.query("select value from system.settings where name = 'max_query_size'")
         == "424242\n"
     )
+    assert (
+        node7.query("select value from system.settings where name = 'max_threads'")
+        == "2\n"
+    )
+
+
+def test_config_invalid_overrides(start_cluster):
+    node7.replace_config(
+        "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml",
+        """
+<clickhouse>
+  <profiles>
+    <default>
+        <max_query_size from_env="MAX_QUERY_SIZE" />
+        <max_threads from_env="MAX_THREADS">100</max_threads>
+    </default>
+  </profiles>
+  <users>
+      <default>
+          <password></password>
+          <profile>default</profile>
+          <quota>default</quota>
+      </default>
+
+      <include incl="users_1" />
+      <include incl="users_2" />
+  </users>
+</clickhouse>
+""",
+    )
+    with pytest.raises(
+        QueryRuntimeException,
+        match="Failed to preprocess config '/etc/clickhouse-server/users.xml': Exception: Element <max_threads> has value and does not have 'replace' attribute, can't process from_env substitution",
+    ):
+        node7.query("SYSTEM RELOAD CONFIG")
+    node7.replace_config(
+        "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml",
+        """
+<clickhouse>
+  <profiles>
+    <default>
+        <max_query_size from_env="MAX_QUERY_SIZE" />
+        <max_threads replace="1" from_env="MAX_THREADS">1</max_threads>
+    </default>
+  </profiles>
+  <users>
+      <default>
+          <password></password>
+          <profile>default</profile>
+          <quota>default</quota>
+      </default>
+
+      <include incl="users_1" />
+      <include incl="users_2" />
+  </users>
+</clickhouse>
+""",
+    )
+    node7.query("SYSTEM RELOAD CONFIG")
 
 
 def test_include_config(start_cluster):

From 7205e4edbeee134dde63451883c21c2f67cf5e01 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 7 Dec 2023 18:47:14 +0100
Subject: [PATCH 074/213] Review fix

---
 src/Interpreters/Cache/FileCache.cpp          | 21 +++++++++++++++++++
 src/Interpreters/Cache/FileCache.h            | 21 +------------------
 .../Cache/SLRUFileCachePriority.cpp           |  2 +-
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index f1b3d24dbc1..29f2467bd12 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -51,6 +51,27 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+void FileCacheReserveStat::update(size_t size, FileSegmentKind kind, bool releasable)
+{
+    auto & local_stat = stat_by_kind[kind];
+    if (releasable)
+    {
+        stat.releasable_size += size;
+        ++stat.releasable_count;
+
+        local_stat.releasable_size += size;
+        ++local_stat.releasable_count;
+    }
+    else
+    {
+        stat.non_releasable_size += size;
+        ++stat.non_releasable_count;
+
+        local_stat.non_releasable_size += size;
+        ++local_stat.non_releasable_count;
+    }
+}
+
 FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & settings)
     : max_file_segment_size(settings.max_file_segment_size)
     , bypass_cache_threshold(settings.enable_bypass_cache_with_threshold ? settings.bypass_cache_threshold : 0)
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 097a63b0abe..9cd2232bf7e 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -38,26 +38,7 @@ struct FileCacheReserveStat
     Stat stat;
     std::unordered_map<FileSegmentKind, Stat> stat_by_kind;
 
-    void update(size_t size, FileSegmentKind kind, bool releasable)
-    {
-        auto & local_stat = stat_by_kind[kind];
-        if (releasable)
-        {
-            stat.releasable_size += size;
-            ++stat.releasable_count;
-
-            local_stat.releasable_size += size;
-            ++local_stat.releasable_count;
-        }
-        else
-        {
-            stat.non_releasable_size += size;
-            ++stat.non_releasable_count;
-
-            local_stat.non_releasable_size += size;
-            ++local_stat.non_releasable_count;
-        }
-    }
+    void update(size_t size, FileSegmentKind kind, bool releasable);
 };
 
 /// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 8f7f5e4aa32..75987bccf6a 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -14,7 +14,7 @@ namespace
 {
     size_t getRatio(size_t total, double ratio)
     {
-        return static_cast<size_t>(total * std::max(0.0, std::min(1.0, ratio)));
+        return static_cast<size_t>(total * std::clamp(ratio, 0.0, 1.0));
     }
 }
 

From ff65d0e72778c66947889f5675d61199845906a1 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 7 Dec 2023 19:49:30 +0100
Subject: [PATCH 075/213] Adjustments after merge with master

---
 src/Interpreters/Cache/FileCache.cpp          |  4 +-
 src/Interpreters/Cache/FileCache.h            |  2 +-
 src/Interpreters/Cache/FileSegment.cpp        |  2 +-
 src/Interpreters/Cache/FileSegment.h          | 72 +-----------------
 src/Interpreters/Cache/FileSegmentInfo.h      | 73 +++++++++++++++++++
 src/Interpreters/Cache/IFileCachePriority.h   | 11 ++-
 .../Cache/LRUFileCachePriority.cpp            |  6 +-
 src/Interpreters/Cache/LRUFileCachePriority.h |  2 +-
 .../Cache/SLRUFileCachePriority.cpp           | 12 ++-
 .../Cache/SLRUFileCachePriority.h             |  2 +-
 src/Interpreters/Cache/test                   |  6 ++
 src/Interpreters/tests/gtest_filecache.cpp    | 57 +++++++++++----
 12 files changed, 152 insertions(+), 97 deletions(-)
 create mode 100644 src/Interpreters/Cache/FileSegmentInfo.h
 create mode 100644 src/Interpreters/Cache/test

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index e3d0e1585b2..1ca654f7b66 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -1173,10 +1173,10 @@ std::vector<FileSegment::Info> FileCache::getFileSegmentInfos(const Key & key)
     return file_segments;
 }
 
-std::vector<FileSegment::Info> FileCache::dumpQueue()
+IFileCachePriority::QueueEntriesDumps FileCache::dumpQueue()
 {
     assertInitialized();
-    return main_priority->dump(lockCache());
+    return main_priority->dump(*this, lockCache());
 }
 
 std::vector<String> FileCache::tryGetCachePaths(const Key & key)
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 0398383f662..02979b72b98 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -126,7 +126,7 @@ public:
 
     std::vector<FileSegment::Info> getFileSegmentInfos(const Key & key);
 
-    std::vector<FileSegment::Info> dumpQueue();
+    IFileCachePriority::QueueEntriesDumps dumpQueue();
 
     void deactivateBackgroundOperations();
 
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index b9399595304..8d2776258ae 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -480,7 +480,7 @@ bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve
 
     bool is_file_segment_size_exceeded;
     {
-        auto lock = segment_guard.lock();
+        auto lock = lockFileSegment();
 
         assertNotDetachedUnlocked(lock);
         assertIsDownloaderUnlocked("reserve", lock);
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index bdeec44237c..1183878fa1c 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -11,8 +11,8 @@
 #include <IO/OpenedFileCache.h>
 #include <base/getThreadId.h>
 #include <Interpreters/Cache/IFileCachePriority.h>
+#include <Interpreters/Cache/FileSegmentInfo.h>
 #include <Interpreters/Cache/FileCache_fwd_internal.h>
-#include <queue>
 
 
 namespace Poco { class Logger; }
@@ -28,23 +28,6 @@ namespace DB
 class ReadBufferFromFileBase;
 struct FileCacheReserveStat;
 
-/*
- * FileSegmentKind is used to specify the eviction policy for file segments.
- */
-enum class FileSegmentKind
-{
-    /* `Regular` file segment is still in cache after usage, and can be evicted
-     * (unless there're some holders).
-     */
-    Regular,
-
-    /* `Temporary` file segment is removed right after releasing.
-     * Also corresponding files are removed during cache loading (if any).
-     */
-    Temporary,
-};
-
-String toString(FileSegmentKind kind);
 
 struct CreateFileSegmentSettings
 {
@@ -69,40 +52,8 @@ public:
     using Downloader = std::string;
     using DownloaderId = std::string;
     using Priority = IFileCachePriority;
-
-    enum class State
-    {
-        DOWNLOADED,
-        /**
-         * When file segment is first created and returned to user, it has state EMPTY.
-         * EMPTY state can become DOWNLOADING when getOrSetDownaloder is called successfully
-         * by any owner of EMPTY state file segment.
-         */
-        EMPTY,
-        /**
-         * A newly created file segment never has DOWNLOADING state until call to getOrSetDownloader
-         * because each cache user might acquire multiple file segments and read them one by one,
-         * so only user which actually needs to read this segment earlier than others - becomes a downloader.
-         */
-        DOWNLOADING,
-        /**
-         * Space reservation for a file segment is incremental, i.e. downloader reads buffer_size bytes
-         * from remote fs -> tries to reserve buffer_size bytes to put them to cache -> writes to cache
-         * on successful reservation and stops cache write otherwise. Those, who waited for the same file
-         * segment, will read downloaded part from cache and remaining part directly from remote fs.
-         */
-        PARTIALLY_DOWNLOADED_NO_CONTINUATION,
-        /**
-         * If downloader did not finish download of current file segment for any reason apart from running
-         * out of cache space, then download can be continued by other owners of this file segment.
-         */
-        PARTIALLY_DOWNLOADED,
-        /**
-         * If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
-         * this file segment as out of cache scope.
-         */
-        DETACHED,
-    };
+    using State = FileSegmentState;
+    using Info = FileSegmentInfo;
 
     FileSegment(
         const Key & key_,
@@ -205,22 +156,7 @@ public:
     /// exception.
     void detach(const FileSegmentGuard::Lock &, const LockedKey &);
 
-    struct Info
-    {
-        FileSegment::Key key;
-        size_t offset;
-        std::string path;
-        uint64_t range_left;
-        uint64_t range_right;
-        FileSegmentKind kind;
-        State state;
-        uint64_t size;
-        uint64_t downloaded_size;
-        uint64_t cache_hits;
-        uint64_t references;
-        bool is_unbound;
-    };
-    static Info getInfo(const FileSegmentPtr & file_segment, FileCache & cache);
+    static FileSegmentInfo getInfo(const FileSegmentPtr & file_segment, FileCache & cache);
 
     bool isDetached() const;
 
diff --git a/src/Interpreters/Cache/FileSegmentInfo.h b/src/Interpreters/Cache/FileSegmentInfo.h
new file mode 100644
index 00000000000..c1a38e28b1d
--- /dev/null
+++ b/src/Interpreters/Cache/FileSegmentInfo.h
@@ -0,0 +1,73 @@
+#pragma once
+#include <Interpreters/Cache/FileCache_fwd.h>
+#include <Interpreters/Cache/FileCacheKey.h>
+
+namespace DB
+{
+    enum class FileSegmentState
+    {
+        DOWNLOADED,
+        /**
+         * When file segment is first created and returned to user, it has state EMPTY.
+         * EMPTY state can become DOWNLOADING when getOrSetDownaloder is called successfully
+         * by any owner of EMPTY state file segment.
+         */
+        EMPTY,
+        /**
+         * A newly created file segment never has DOWNLOADING state until call to getOrSetDownloader
+         * because each cache user might acquire multiple file segments and read them one by one,
+         * so only user which actually needs to read this segment earlier than others - becomes a downloader.
+         */
+        DOWNLOADING,
+        /**
+         * Space reservation for a file segment is incremental, i.e. downloader reads buffer_size bytes
+         * from remote fs -> tries to reserve buffer_size bytes to put them to cache -> writes to cache
+         * on successful reservation and stops cache write otherwise. Those, who waited for the same file
+         * segment, will read downloaded part from cache and remaining part directly from remote fs.
+         */
+        PARTIALLY_DOWNLOADED_NO_CONTINUATION,
+        /**
+         * If downloader did not finish download of current file segment for any reason apart from running
+         * out of cache space, then download can be continued by other owners of this file segment.
+         */
+        PARTIALLY_DOWNLOADED,
+        /**
+         * If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
+         * this file segment as out of cache scope.
+         */
+        DETACHED,
+    };
+
+    enum class FileSegmentKind
+    {
+        /**
+         * `Regular` file segment is still in cache after usage, and can be evicted
+         * (unless there're some holders).
+         */
+        Regular,
+
+        /**
+         * Temporary` file segment is removed right after releasing.
+         * Also corresponding files are removed during cache loading (if any).
+         */
+        Temporary,
+    };
+
+    std::string toString(FileSegmentKind kind);
+
+    struct FileSegmentInfo
+    {
+        FileCacheKey key;
+        size_t offset;
+        std::string path;
+        uint64_t range_left;
+        uint64_t range_right;
+        FileSegmentKind kind;
+        FileSegmentState state;
+        uint64_t size;
+        uint64_t downloaded_size;
+        uint64_t cache_hits;
+        uint64_t references;
+        bool is_unbound;
+    };
+}
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index e7a05b0e46d..dfc57328242 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -3,8 +3,9 @@
 #include <memory>
 #include <Core/Types.h>
 #include <Common/Exception.h>
-#include <Interpreters/Cache/FileCacheKey.h>
+#include <Interpreters/Cache/FileSegmentInfo.h>
 #include <Interpreters/Cache/Guards.h>
+#include <Interpreters/Cache/IFileCachePriority.h>
 #include <Interpreters/Cache/FileCache_fwd_internal.h>
 
 namespace DB
@@ -71,7 +72,13 @@ public:
 
     virtual void shuffle(const CacheGuard::Lock &) = 0;
 
-    virtual FileSegments dump(const CacheGuard::Lock &) = 0;
+    struct QueueEntryDump
+    {
+        FileSegmentInfo info;
+        bool is_protected = false;
+    };
+    using QueueEntriesDumps = std::vector<QueueEntryDump>;
+    virtual QueueEntriesDumps dump(FileCache & cache, const CacheGuard::Lock &) = 0;
 
     using FinalizeEvictionFunc = std::function<void(const CacheGuard::Lock & lk)>;
     virtual bool collectCandidatesForEviction(
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index a596d041941..ea0ce168913 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -277,12 +277,12 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, L
     return LRUIterator(this, it.iterator);
 }
 
-FileSegments LRUFileCachePriority::dump(const CacheGuard::Lock & lock)
+IFileCachePriority::QueueEntriesDumps LRUFileCachePriority::dump(FileCache & cache, const CacheGuard::Lock & lock)
 {
-    FileSegments res;
+    QueueEntriesDumps res;
     iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata)
     {
-        res.push_back(FileSegment::getSnapshot(segment_metadata->file_segment));
+        res.emplace_back(FileSegment::getInfo(segment_metadata->file_segment, cache));
         return IterationResult::CONTINUE;
     }, lock);
     return res;
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 31a5c527ca8..a9d823313d9 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -44,7 +44,7 @@ public:
 
     void shuffle(const CacheGuard::Lock &) override;
 
-    FileSegments dump(const CacheGuard::Lock &) override;
+    QueueEntriesDumps dump(FileCache & cache, const CacheGuard::Lock &) override;
 
     void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); }
 
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 75987bccf6a..88d90e4dede 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -222,10 +222,16 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
     iterator.is_protected = true;
 }
 
-FileSegments SLRUFileCachePriority::dump(const CacheGuard::Lock & lock)
+IFileCachePriority::QueueEntriesDumps SLRUFileCachePriority::dump(FileCache & cache, const CacheGuard::Lock & lock)
 {
-    auto res = probationary_queue.dump(lock);
-    auto part_res = protected_queue.dump(lock);
+    auto res = probationary_queue.dump(cache, lock);
+    for (auto & entry : res)
+        entry.is_protected = false;
+
+    auto part_res = protected_queue.dump(cache, lock);
+    for (auto & entry : part_res)
+        entry.is_protected = true;
+
     res.insert(res.end(), part_res.begin(), part_res.end());
     return res;
 }
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index 7753f6f23b2..b08fce50f00 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -43,7 +43,7 @@ public:
 
     void shuffle(const CacheGuard::Lock &) override;
 
-    FileSegments dump(const CacheGuard::Lock &) override;
+    QueueEntriesDumps dump(FileCache & cache, const CacheGuard::Lock &) override;
 
 private:
     LRUFileCachePriority protected_queue;
diff --git a/src/Interpreters/Cache/test b/src/Interpreters/Cache/test
new file mode 100644
index 00000000000..5b1f397194b
--- /dev/null
+++ b/src/Interpreters/Cache/test
@@ -0,0 +1,6 @@
+CREATE DICTIONARY dict (`id` String, `timestamp` DateTime)
+PRIMARY KEY id SOURCE(CLICKHOUSE(QUERY 'SELECT \'test\' as id, now() as timestamp')) LAYOUT(DIRECT());
+
+CREATE table t (id LowCardinality(String)) engine = MergeTree() ORDER BY id;
+
+CREATE VIEW v AS select dictGet(dict, 'timestamp', id) from t;
diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
index 15a7598bfd5..3bf7d1b9a14 100644
--- a/src/Interpreters/tests/gtest_filecache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -142,22 +142,49 @@ void assertEqual(const std::vector<FileSegment::Info> & file_segments, const Ran
     }
 }
 
-void assertProtectedOrProbationary(const FileSegments & file_segments, const Ranges & expected, bool assert_protected)
+void assertEqual(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected_ranges, const States & expected_states = {})
+{
+    std::cerr << "File segments: ";
+    for (const auto & f : file_segments)
+    {
+        auto range = FileSegment::Range(f.info.range_left, f.info.range_right);
+        std::cerr << range.toString() << ", ";
+    }
+
+    ASSERT_EQ(file_segments.size(), expected_ranges.size());
+
+    if (!expected_states.empty())
+        ASSERT_EQ(file_segments.size(), expected_states.size());
+
+    auto get_expected_state = [&](size_t i)
+    {
+        if (expected_states.empty())
+            return State::DOWNLOADED;
+        else
+            return expected_states[i];
+    };
+
+    size_t i = 0;
+    for (const auto & f : file_segments)
+    {
+        auto range = FileSegment::Range(f.info.range_left, f.info.range_right);
+        ASSERT_EQ(range, expected_ranges[i]);
+        ASSERT_EQ(f.info.state, get_expected_state(i));
+        ++i;
+    }
+}
+
+void assertProtectedOrProbationary(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected, bool assert_protected)
 {
     std::cerr << "File segments: ";
     std::vector<Range> res;
     for (const auto & f : file_segments)
     {
-        std::cerr << f->range().toString() << ", ";
-        if (auto it = f->getQueueIterator())
+        auto range = FileSegment::Range(f.info.range_left, f.info.range_right);
+        std::cerr << range.toString() << ", ";
+        if ((f.is_protected && assert_protected) || (!f.is_protected && !assert_protected))
         {
-            if (auto * slru_it = dynamic_cast<SLRUFileCachePriority::SLRUIterator *>(it.get()))
-            {
-                if ((slru_it->isProtected() && assert_protected) || (!slru_it->isProtected() && !assert_protected))
-                {
-                    res.push_back(f->range());
-                }
-            }
+            res.push_back(range);
         }
     }
 
@@ -168,12 +195,12 @@ void assertProtectedOrProbationary(const FileSegments & file_segments, const Ran
     }
 }
 
-void assertProtected(const FileSegments & file_segments, const Ranges & expected)
+void assertProtected(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected)
 {
     assertProtectedOrProbationary(file_segments, expected, true);
 }
 
-void assertProbationary(const FileSegments & file_segments, const Ranges & expected)
+void assertProbationary(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected)
 {
     assertProtectedOrProbationary(file_segments, expected, false);
 }
@@ -1151,7 +1178,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
         add_range(0, 10);
         add_range(10, 5);
 
-        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
+        assertEqual(cache.getFileSegmentInfos(key), { Range(0, 9), Range(10, 14) });
         assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
 
         ASSERT_EQ(cache.getFileSegmentsNum(), 2);
@@ -1181,7 +1208,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
         assertProbationary(cache.dumpQueue(), { Range(17, 20), Range(24, 26), Range(27, 27) });
         assertProtected(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
 
-        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
+        assertEqual(cache.getFileSegmentInfos(key), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
         ASSERT_EQ(cache.getFileSegmentsNum(), 5);
         ASSERT_EQ(cache.getUsedCacheSize(), 23);
 
@@ -1201,7 +1228,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
         assertProbationary(cache.dumpQueue(), { Range(24, 26), Range(10, 14) });
         assertProtected(cache.dumpQueue(), { Range(0, 9), Range(27, 27), Range(28, 30) });
 
-        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(24, 26), Range(27, 27), Range(28, 30) });
+        assertEqual(cache.getFileSegmentInfos(key), { Range(0, 9), Range(10, 14), Range(24, 26), Range(27, 27), Range(28, 30) });
         ASSERT_EQ(cache.getFileSegmentsNum(), 5);
         ASSERT_EQ(cache.getUsedCacheSize(), 22);
     }

From 4616ecb53652495111db72479463ced149f280ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 8 Dec 2023 10:11:53 +0800
Subject: [PATCH 076/213] fix tests

---
 .../0_stateless/02931_max_num_to_warn.sql     | 70 +++++++++----------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index f8043e9d2a5..dfb6e4ceac5 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -1,27 +1,27 @@
-CREATE DATABASE test_max_num_to_warn_02931;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_1 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_2 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_3 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_4 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_5 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_6 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_7 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_8 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_9 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_10 (id Int32, str String) Engine=Memory;
-CREATE TABLE test_max_num_to_warn_02931.test_max_num_to_warn_11 (id Int32, str String) Engine=Memory;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_02931;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_1 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_2 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_3 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_4 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_5 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_6 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_7 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_8 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_9 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_10 (id Int32, str String) Engine=Memory;
+CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_11 (id Int32, str String) Engine=Memory;
 
-CREATE DATABASE test_max_num_to_warn_1;
-CREATE DATABASE test_max_num_to_warn_2;
-CREATE DATABASE test_max_num_to_warn_3;
-CREATE DATABASE test_max_num_to_warn_4;
-CREATE DATABASE test_max_num_to_warn_5;
-CREATE DATABASE test_max_num_to_warn_6;
-CREATE DATABASE test_max_num_to_warn_7;
-CREATE DATABASE test_max_num_to_warn_8;
-CREATE DATABASE test_max_num_to_warn_9;
-CREATE DATABASE test_max_num_to_warn_10;
-CREATE DATABASE test_max_num_to_warn_11;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_1;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_2;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_3;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_4;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_5;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_6;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_7;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_8;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_9;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_10;
+CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_11;
 
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_1 VALUES (1, 'Hello');
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_2 VALUES (1, 'Hello');
@@ -37,15 +37,15 @@ INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello
 
 SELECT * FROM system.warnings where message in ('The number of attached tables is more than 10', 'The number of attached databases is more than 10', 'The number of active parts is more than 10');
 
-DROP DATABASE test_max_num_to_warn_02931;
-DROP DATABASE test_max_num_to_warn_1;
-DROP DATABASE test_max_num_to_warn_2;
-DROP DATABASE test_max_num_to_warn_3;
-DROP DATABASE test_max_num_to_warn_4;
-DROP DATABASE test_max_num_to_warn_5;
-DROP DATABASE test_max_num_to_warn_6;
-DROP DATABASE test_max_num_to_warn_7;
-DROP DATABASE test_max_num_to_warn_8;
-DROP DATABASE test_max_num_to_warn_9;
-DROP DATABASE test_max_num_to_warn_10;
-DROP DATABASE test_max_num_to_warn_11;
+DROP DATABASE IF EXISTS test_max_num_to_warn_02931;
+DROP DATABASE IF EXISTS test_max_num_to_warn_1;
+DROP DATABASE IF EXISTS test_max_num_to_warn_2;
+DROP DATABASE IF EXISTS test_max_num_to_warn_3;
+DROP DATABASE IF EXISTS test_max_num_to_warn_4;
+DROP DATABASE IF EXISTS test_max_num_to_warn_5;
+DROP DATABASE IF EXISTS test_max_num_to_warn_6;
+DROP DATABASE IF EXISTS test_max_num_to_warn_7;
+DROP DATABASE IF EXISTS test_max_num_to_warn_8;
+DROP DATABASE IF EXISTS test_max_num_to_warn_9;
+DROP DATABASE IF EXISTS test_max_num_to_warn_10;
+DROP DATABASE IF EXISTS test_max_num_to_warn_11;

From f2ef3ed5c9750fd8661455d7b9fd31370df038ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 8 Dec 2023 14:22:33 +0800
Subject: [PATCH 077/213] fix tests

---
 tests/queries/0_stateless/02931_max_num_to_warn.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index dfb6e4ceac5..49b981fc355 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel
+
 CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_02931;
 CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_1 (id Int32, str String) Engine=Memory;
 CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_2 (id Int32, str String) Engine=Memory;

From d77938c3ff86175a6e10025aea72452bc40a5b90 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 8 Dec 2023 12:42:38 +0100
Subject: [PATCH 078/213] Add validation of columns

---
 .../MaterializedPostgreSQLConsumer.cpp        | 65 +++++++++++++++++--
 .../MaterializedPostgreSQLConsumer.h          |  1 +
 2 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
index 6be1563d16c..d369b8f3788 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@@ -79,6 +79,7 @@ MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & sto
     : storage(storage_info.storage)
     , table_description(storage_info.storage->getInMemoryMetadataPtr()->getSampleBlock())
     , columns_attributes(storage_info.attributes)
+    , column_names(storage_info.storage->getInMemoryMetadataPtr()->getColumns().getNamesOfPhysical())
     , array_info(createArrayInfos(storage_info.storage->getInMemoryMetadataPtr()->getColumns().getAllPhysical(), table_description))
 {
     auto columns_num = table_description.sample_block.columns();
@@ -548,34 +549,88 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
                 return;
             }
 
-            Int16 num_columns = readInt16(replication_message, pos, size);
+            auto log_table_structure_changed = [&](std::string_view reason)
+            {
+                LOG_INFO(log, "Table structure of the table {} changed ({}), "
+                         "will mark it as skipped from replication. "
+                         "Please perform manual DETACH and ATTACH of the table to bring it back",
+                         table_name, reason);
+            };
 
-            Int32 data_type_id;
-            Int32 type_modifier; /// For example, n in varchar(n)
+            Int16 num_columns = readInt16(replication_message, pos, size);
 
             auto & storage_data = storage_iter->second;
             const auto & description = storage_data.table_description;
 
+            const size_t actual_columns_num = storage_data.getColumnsNum();
+            if (size_t(num_columns) > actual_columns_num - 2)
+            {
+                log_table_structure_changed(fmt::format("received {} columns, expected {}", num_columns, actual_columns_num - 2));
+                markTableAsSkipped(relation_id, table_name);
+                return;
+            }
+
+            Int32 data_type_id;
+            Int32 type_modifier; /// For example, n in varchar(n)
+
+            std::set<std::string> all_columns(storage_data.column_names.begin(), storage_data.column_names.end());
+            std::set<std::string> received_columns;
             ColumnsWithTypeAndName columns;
+
             for (uint16_t i = 0; i < num_columns; ++i)
             {
                 String column_name;
                 readInt8(replication_message, pos, size); /// Marks column as part of replica identity index
                 readString(replication_message, pos, size, column_name);
 
+                if (!all_columns.contains(column_name))
+                {
+                    log_table_structure_changed(fmt::format("column {} is not known", column_name));
+                    markTableAsSkipped(relation_id, table_name);
+                    return;
+                }
+
                 data_type_id = readInt32(replication_message, pos, size);
                 type_modifier = readInt32(replication_message, pos, size);
 
                 columns.push_back(description.sample_block.getByName(column_name));
+                received_columns.emplace(column_name);
 
                 const auto & attributes_it = storage_data.columns_attributes.find(column_name);
                 if (attributes_it == storage_data.columns_attributes.end())
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column: {}", column_name);
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "No column {} in attributes", column_name);
 
                 const auto & attributes = attributes_it->second;
                 if (attributes.atttypid != data_type_id || attributes.atttypmod != type_modifier)
                 {
-                    LOG_TEST(log, "Column {} has a different type", column_name);
+                    log_table_structure_changed(fmt::format("column {} has a different type", column_name));
+                    markTableAsSkipped(relation_id, table_name);
+                    return;
+                }
+            }
+
+
+            if (size_t(num_columns) < actual_columns_num)
+            {
+                std::vector<std::string> absent_columns;
+                std::set_difference(
+                    all_columns.begin(), all_columns.end(),
+                    received_columns.begin(), received_columns.end(), std::back_inserter(absent_columns));
+
+                for (const auto & name : absent_columns)
+                {
+                    if (name == "_sign" || name == "_version")
+                        continue;
+
+                    const auto & attributes_it = storage_data.columns_attributes.find(name);
+                    if (attributes_it == storage_data.columns_attributes.end())
+                        throw Exception(ErrorCodes::LOGICAL_ERROR, "No column {} in attributes", name);
+
+                    /// Column has a default value or it is a GENERATED columns.
+                    if (!attributes_it->second.attr_def.empty())
+                        continue;
+
+                    log_table_structure_changed(fmt::format("column {} was not found", name));
                     markTableAsSkipped(relation_id, table_name);
                     return;
                 }
diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
index d29236b8123..1614cb4bdbc 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
@@ -43,6 +43,7 @@ private:
         const StoragePtr storage;
         const ExternalResultDescription table_description;
         const PostgreSQLTableStructure::Attributes columns_attributes;
+        const Names column_names;
         const ArrayInfo array_info;
 
         struct Buffer

From 0c40465fec34d4e3af6560332f7e171d3b1b666b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 8 Dec 2023 12:46:59 +0100
Subject: [PATCH 079/213] Test

---
 .../test.py                                   | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py
index 965cca54a23..ddfb5608336 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py
@@ -915,6 +915,100 @@ def test_failed_load_from_snapshot(started_cluster):
     )
 
 
+def test_generated_columns(started_cluster):
+    table = "test_generated_columns"
+
+    pg_manager.create_postgres_table(
+        table,
+        "",
+        f"""CREATE TABLE {table} (
+             key integer PRIMARY KEY,
+             x integer,
+             y integer GENERATED ALWAYS AS (x*2) STORED,
+             z text);
+         """,
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');")
+
+    pg_manager.create_materialized_db(
+        ip=started_cluster.postgres_ip,
+        port=started_cluster.postgres_port,
+        settings=[
+            f"materialized_postgresql_tables_list = '{table}'",
+            "materialized_postgresql_backoff_min_ms = 100",
+            "materialized_postgresql_backoff_max_ms = 100",
+        ],
+    )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z) values (3,3,'3');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (4,4,'4');")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z) values (5,5,'5');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (6,6,'6');")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+
+def test_default_columns(started_cluster):
+    table = "test_default_columns"
+
+    pg_manager.create_postgres_table(
+        table,
+        "",
+        f"""CREATE TABLE {table} (
+             key integer PRIMARY KEY,
+             x integer,
+             y text DEFAULT 'y1',
+             z integer,
+             a text DEFAULT 'a1',
+             b integer);
+         """,
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (1,1,1,1);")
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (2,2,2,2);")
+
+    pg_manager.create_materialized_db(
+        ip=started_cluster.postgres_ip,
+        port=started_cluster.postgres_port,
+        settings=[
+            f"materialized_postgresql_tables_list = '{table}'",
+            "materialized_postgresql_backoff_min_ms = 100",
+            "materialized_postgresql_backoff_max_ms = 100",
+        ],
+    )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (3,3,3,3);")
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (4,4,4,4);")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (5,5,5,5);")
+    pg_manager.execute(f"insert into {table} (key, x, z, b) values (6,6,6,6);")
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From ea63819c76b5884f63573a408720a1a862999759 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 8 Dec 2023 15:21:42 +0100
Subject: [PATCH 080/213] Better

---
 src/Interpreters/Cache/FileCache.cpp          |   2 +-
 src/Interpreters/Cache/FileCache.h            |   2 +-
 src/Interpreters/Cache/FileSegment.cpp        |   5 +-
 src/Interpreters/Cache/FileSegment.h          |   1 +
 src/Interpreters/Cache/FileSegmentInfo.h      |   9 +
 src/Interpreters/Cache/IFileCachePriority.h   |  11 +-
 .../Cache/LRUFileCachePriority.cpp            |   4 +-
 src/Interpreters/Cache/LRUFileCachePriority.h |   4 +-
 .../Cache/SLRUFileCachePriority.cpp           |   8 +-
 .../Cache/SLRUFileCachePriority.h             |   4 +-
 src/Interpreters/Cache/test                   |   6 -
 src/Interpreters/tests/gtest_filecache.cpp    | 167 +++++++++++++-----
 12 files changed, 151 insertions(+), 72 deletions(-)
 delete mode 100644 src/Interpreters/Cache/test

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 1ca654f7b66..23768102616 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -1173,7 +1173,7 @@ std::vector<FileSegment::Info> FileCache::getFileSegmentInfos(const Key & key)
     return file_segments;
 }
 
-IFileCachePriority::QueueEntriesDumps FileCache::dumpQueue()
+std::vector<FileSegment::Info> FileCache::dumpQueue()
 {
     assertInitialized();
     return main_priority->dump(*this, lockCache());
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 02979b72b98..0398383f662 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -126,7 +126,7 @@ public:
 
     std::vector<FileSegment::Info> getFileSegmentInfos(const Key & key);
 
-    IFileCachePriority::QueueEntriesDumps dumpQueue();
+    std::vector<FileSegment::Info> dumpQueue();
 
     void deactivateBackgroundOperations();
 
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 8d2776258ae..9d4c79c96fb 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -849,6 +849,7 @@ FileSegment::Info FileSegment::getInfo(const FileSegmentPtr & file_segment, File
         .cache_hits = file_segment->hits_count,
         .references = static_cast<uint64_t>(file_segment.use_count()),
         .is_unbound = file_segment->is_unbound,
+        .queue_entry_type = file_segment->queue_iterator ? file_segment->queue_iterator->getType() : QueueEntryType::None,
     };
 }
 
@@ -914,10 +915,6 @@ void FileSegment::increasePriority()
         return;
     }
 
-    /// Priority can be increased only for downloaded file segments.
-    if (download_state != State::DOWNLOADED)
-        return;
-
     auto it = getQueueIterator();
     if (it)
     {
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 1183878fa1c..3cd5604c9bf 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -54,6 +54,7 @@ public:
     using Priority = IFileCachePriority;
     using State = FileSegmentState;
     using Info = FileSegmentInfo;
+    using QueueEntryType = FileCacheQueueEntryType;
 
     FileSegment(
         const Key & key_,
diff --git a/src/Interpreters/Cache/FileSegmentInfo.h b/src/Interpreters/Cache/FileSegmentInfo.h
index c1a38e28b1d..bb87cbbc15d 100644
--- a/src/Interpreters/Cache/FileSegmentInfo.h
+++ b/src/Interpreters/Cache/FileSegmentInfo.h
@@ -53,6 +53,14 @@ namespace DB
         Temporary,
     };
 
+    enum class FileCacheQueueEntryType
+    {
+        None,
+        LRU,
+        SLRU_Protected,
+        SLRU_Probationary,
+    };
+
     std::string toString(FileSegmentKind kind);
 
     struct FileSegmentInfo
@@ -69,5 +77,6 @@ namespace DB
         uint64_t cache_hits;
         uint64_t references;
         bool is_unbound;
+        FileCacheQueueEntryType queue_entry_type;
     };
 }
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index dfc57328242..0f407a3082c 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -17,6 +17,7 @@ class IFileCachePriority : private boost::noncopyable
 {
 public:
     using Key = FileCacheKey;
+    using QueueEntryType = FileCacheQueueEntryType;
 
     struct Entry
     {
@@ -45,6 +46,8 @@ public:
         virtual void remove(const CacheGuard::Lock &) = 0;
 
         virtual void invalidate() = 0;
+
+        virtual QueueEntryType getType() const = 0;
     };
     using IteratorPtr = std::shared_ptr<Iterator>;
 
@@ -72,13 +75,7 @@ public:
 
     virtual void shuffle(const CacheGuard::Lock &) = 0;
 
-    struct QueueEntryDump
-    {
-        FileSegmentInfo info;
-        bool is_protected = false;
-    };
-    using QueueEntriesDumps = std::vector<QueueEntryDump>;
-    virtual QueueEntriesDumps dump(FileCache & cache, const CacheGuard::Lock &) = 0;
+    virtual std::vector<FileSegmentInfo> dump(FileCache & cache, const CacheGuard::Lock &) = 0;
 
     using FinalizeEvictionFunc = std::function<void(const CacheGuard::Lock & lk)>;
     virtual bool collectCandidatesForEviction(
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index ea0ce168913..a6abaea11c3 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -277,9 +277,9 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, L
     return LRUIterator(this, it.iterator);
 }
 
-IFileCachePriority::QueueEntriesDumps LRUFileCachePriority::dump(FileCache & cache, const CacheGuard::Lock & lock)
+std::vector<FileSegmentInfo> LRUFileCachePriority::dump(FileCache & cache, const CacheGuard::Lock & lock)
 {
-    QueueEntriesDumps res;
+    std::vector<FileSegmentInfo> res;
     iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata)
     {
         res.emplace_back(FileSegment::getInfo(segment_metadata->file_segment, cache));
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index a9d823313d9..5ff6c61eb4d 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -44,7 +44,7 @@ public:
 
     void shuffle(const CacheGuard::Lock &) override;
 
-    QueueEntriesDumps dump(FileCache & cache, const CacheGuard::Lock &) override;
+    std::vector<FileSegmentInfo> dump(FileCache & cache, const CacheGuard::Lock &) override;
 
     void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); }
 
@@ -99,6 +99,8 @@ public:
 
     void updateSize(int64_t size) override;
 
+    QueueEntryType getType() const override { return QueueEntryType::LRU; }
+
 private:
     void assertValid() const;
 
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 88d90e4dede..7b3e666f595 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -222,16 +222,10 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
     iterator.is_protected = true;
 }
 
-IFileCachePriority::QueueEntriesDumps SLRUFileCachePriority::dump(FileCache & cache, const CacheGuard::Lock & lock)
+std::vector<FileSegmentInfo> SLRUFileCachePriority::dump(FileCache & cache, const CacheGuard::Lock & lock)
 {
     auto res = probationary_queue.dump(cache, lock);
-    for (auto & entry : res)
-        entry.is_protected = false;
-
     auto part_res = protected_queue.dump(cache, lock);
-    for (auto & entry : part_res)
-        entry.is_protected = true;
-
     res.insert(res.end(), part_res.begin(), part_res.end());
     return res;
 }
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index b08fce50f00..45fc7ad8333 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -43,7 +43,7 @@ public:
 
     void shuffle(const CacheGuard::Lock &) override;
 
-    QueueEntriesDumps dump(FileCache & cache, const CacheGuard::Lock &) override;
+    std::vector<FileSegmentInfo> dump(FileCache & cache, const CacheGuard::Lock &) override;
 
 private:
     LRUFileCachePriority protected_queue;
@@ -72,7 +72,7 @@ public:
 
     void updateSize(int64_t size) override;
 
-    bool isProtected() const { return is_protected; }
+    QueueEntryType getType() const override { return is_protected ? QueueEntryType::SLRU_Protected : QueueEntryType::SLRU_Probationary; }
 
 private:
     void assertValid() const;
diff --git a/src/Interpreters/Cache/test b/src/Interpreters/Cache/test
deleted file mode 100644
index 5b1f397194b..00000000000
--- a/src/Interpreters/Cache/test
+++ /dev/null
@@ -1,6 +0,0 @@
-CREATE DICTIONARY dict (`id` String, `timestamp` DateTime)
-PRIMARY KEY id SOURCE(CLICKHOUSE(QUERY 'SELECT \'test\' as id, now() as timestamp')) LAYOUT(DIRECT());
-
-CREATE table t (id LowCardinality(String)) engine = MergeTree() ORDER BY id;
-
-CREATE VIEW v AS select dictGet(dict, 'timestamp', id) from t;
diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
index 3bf7d1b9a14..1005e6090b8 100644
--- a/src/Interpreters/tests/gtest_filecache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -84,11 +84,12 @@ using HolderPtr = FileSegmentsHolderPtr;
 
 fs::path caches_dir = fs::current_path() / "lru_cache_test";
 std::string cache_base_path = caches_dir / "cache1" / "";
+std::string cache_base_path2 = caches_dir / "cache2" / "";
 
 
 void assertEqual(const FileSegmentsHolderPtr & file_segments, const Ranges & expected_ranges, const States & expected_states = {})
 {
-    std::cerr << "File segments: ";
+    std::cerr << "\nFile segments: ";
     for (const auto & file_segment : *file_segments)
         std::cerr << file_segment->range().toString() << ", ";
 
@@ -116,9 +117,12 @@ void assertEqual(const FileSegmentsHolderPtr & file_segments, const Ranges & exp
 
 void assertEqual(const std::vector<FileSegment::Info> & file_segments, const Ranges & expected_ranges, const States & expected_states = {})
 {
-    std::cerr << "File segments: ";
+    std::cerr << "\nFile segments: ";
     for (const auto & file_segment : file_segments)
         std::cerr << FileSegment::Range(file_segment.range_left, file_segment.range_right).toString() << ", ";
+    std::cerr << "\nExpected: ";
+    for (const auto & r : expected_ranges)
+        std::cerr << r.toString() << ", ";
 
     ASSERT_EQ(file_segments.size(), expected_ranges.size());
 
@@ -142,51 +146,29 @@ void assertEqual(const std::vector<FileSegment::Info> & file_segments, const Ran
     }
 }
 
-void assertEqual(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected_ranges, const States & expected_states = {})
+void assertProtectedOrProbationary(const std::vector<FileSegmentInfo> & file_segments, const Ranges & expected, bool assert_protected)
 {
-    std::cerr << "File segments: ";
-    for (const auto & f : file_segments)
-    {
-        auto range = FileSegment::Range(f.info.range_left, f.info.range_right);
-        std::cerr << range.toString() << ", ";
-    }
-
-    ASSERT_EQ(file_segments.size(), expected_ranges.size());
-
-    if (!expected_states.empty())
-        ASSERT_EQ(file_segments.size(), expected_states.size());
-
-    auto get_expected_state = [&](size_t i)
-    {
-        if (expected_states.empty())
-            return State::DOWNLOADED;
-        else
-            return expected_states[i];
-    };
-
-    size_t i = 0;
-    for (const auto & f : file_segments)
-    {
-        auto range = FileSegment::Range(f.info.range_left, f.info.range_right);
-        ASSERT_EQ(range, expected_ranges[i]);
-        ASSERT_EQ(f.info.state, get_expected_state(i));
-        ++i;
-    }
-}
-
-void assertProtectedOrProbationary(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected, bool assert_protected)
-{
-    std::cerr << "File segments: ";
+    std::cerr << "\nFile segments: ";
     std::vector<Range> res;
     for (const auto & f : file_segments)
     {
-        auto range = FileSegment::Range(f.info.range_left, f.info.range_right);
-        std::cerr << range.toString() << ", ";
-        if ((f.is_protected && assert_protected) || (!f.is_protected && !assert_protected))
+        auto range = FileSegment::Range(f.range_left, f.range_right);
+        bool is_protected = (f.queue_entry_type == FileCacheQueueEntryType::SLRU_Protected);
+        bool is_probationary = (f.queue_entry_type == FileCacheQueueEntryType::SLRU_Probationary);
+        ASSERT_TRUE(is_probationary || is_protected);
+
+        std::cerr << fmt::format("{} (protected: {})", range.toString(), is_protected) <<  ", ";
+
+        if ((is_protected && assert_protected) || (!is_protected && !assert_protected))
         {
             res.push_back(range);
         }
     }
+    std::cerr << "\nExpected: ";
+    for (const auto & range : expected)
+    {
+        std::cerr << range.toString() << ", ";
+    }
 
     ASSERT_EQ(res.size(), expected.size());
     for (size_t i = 0; i < res.size(); ++i)
@@ -195,13 +177,15 @@ void assertProtectedOrProbationary(const IFileCachePriority::QueueEntriesDumps &
     }
 }
 
-void assertProtected(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected)
+void assertProtected(const std::vector<FileSegmentInfo> & file_segments, const Ranges & expected)
 {
+    std::cerr << "\nAssert protected";
     assertProtectedOrProbationary(file_segments, expected, true);
 }
 
-void assertProbationary(const IFileCachePriority::QueueEntriesDumps & file_segments, const Ranges & expected)
+void assertProbationary(const std::vector<FileSegmentInfo> & file_segments, const Ranges & expected)
 {
+    std::cerr << "\nAssert probationary";
     assertProtectedOrProbationary(file_segments, expected, false);
 }
 
@@ -251,6 +235,13 @@ void increasePriority(const HolderPtr & holder)
         it->increasePriority();
 }
 
+void increasePriority(const HolderPtr & holder, size_t pos)
+{
+    FileSegments::iterator it = holder->begin();
+    std::advance(it, pos);
+    (*it)->increasePriority();
+}
+
 class FileCacheTest : public ::testing::Test
 {
 public:
@@ -285,7 +276,10 @@ public:
 
         if (fs::exists(cache_base_path))
             fs::remove_all(cache_base_path);
+        if (fs::exists(cache_base_path2))
+            fs::remove_all(cache_base_path2);
         fs::create_directories(cache_base_path);
+        fs::create_directories(cache_base_path2);
     }
 
     void TearDown() override
@@ -1232,4 +1226,95 @@ TEST_F(FileCacheTest, SLRUPolicy)
         ASSERT_EQ(cache.getFileSegmentsNum(), 5);
         ASSERT_EQ(cache.getUsedCacheSize(), 22);
     }
+
+    {
+        ReadSettings read_settings;
+        read_settings.enable_filesystem_cache = true;
+        read_settings.local_fs_method = LocalFSReadMethod::pread;
+
+        auto write_file = [](const std::string & filename, const std::string & s)
+        {
+            std::string file_path = fs::current_path() / filename;
+            auto wb = std::make_unique<WriteBufferFromFile>(file_path, DBMS_DEFAULT_BUFFER_SIZE);
+            wb->write(s.data(), s.size());
+            wb->next();
+            wb->finalize();
+            return file_path;
+        };
+
+        DB::FileCacheSettings settings2;
+        settings2.base_path = cache_base_path2;
+        settings2.max_file_segment_size = 5;
+        settings2.max_size = 30;
+        settings2.max_elements = 6;
+        settings2.boundary_alignment = 1;
+        settings2.cache_policy = "SLRU";
+        settings2.slru_size_ratio = 0.5;
+
+        auto cache = std::make_shared<DB::FileCache>("slru_2", settings2);
+        cache->initialize();
+
+        auto read_and_check = [&](const std::string & file, const FileCacheKey & key, const std::string & expect_result)
+        {
+            auto read_buffer_creator = [&]()
+            {
+                return createReadBufferFromFileBase(file, read_settings, std::nullopt, std::nullopt);
+            };
+
+            auto cached_buffer = std::make_shared<CachedOnDiskReadBufferFromFile>(
+                file, key, cache, read_buffer_creator, read_settings, "test", expect_result.size(), false, false, std::nullopt, nullptr);
+
+            WriteBufferFromOwnString result;
+            copyData(*cached_buffer, result);
+            ASSERT_EQ(result.str(), expect_result);
+        };
+
+        std::string data1(15, '*');
+        auto file1 = write_file("test1", data1);
+        auto key1 = cache->createKeyForPath(file1);
+
+        read_and_check(file1, key1, data1);
+
+        assertEqual(cache->dumpQueue(), { Range(0, 4), Range(5, 9), Range(10, 14) });
+        assertProbationary(cache->dumpQueue(), { Range(0, 4), Range(5, 9), Range(10, 14) });
+        assertProtected(cache->dumpQueue(), Ranges{});
+
+        read_and_check(file1, key1, data1);
+
+        assertEqual(cache->dumpQueue(), { Range(0, 4), Range(5, 9), Range(10, 14) });
+        assertProbationary(cache->dumpQueue(), Ranges{});
+        assertProtected(cache->dumpQueue(), { Range(0, 4), Range(5, 9), Range(10, 14) });
+
+        std::string data2(10, '*');
+        auto file2 = write_file("test2", data2);
+        auto key2 = cache->createKeyForPath(file2);
+
+        read_and_check(file2, key2, data2);
+
+        auto dump = cache->dumpQueue();
+        assertEqual(dump, { Range(0, 4), Range(5, 9), Range(0, 4), Range(5, 9), Range(10, 14) });
+
+        ASSERT_EQ(dump[0].key, key2);
+        ASSERT_EQ(dump[1].key, key2);
+        ASSERT_EQ(dump[2].key, key1);
+        ASSERT_EQ(dump[3].key, key1);
+        ASSERT_EQ(dump[4].key, key1);
+
+        assertProbationary(cache->dumpQueue(), { Range(0, 4), Range(5, 9) });
+        assertProtected(cache->dumpQueue(), { Range(0, 4), Range(5, 9), Range(10, 14) });
+
+        read_and_check(file2, key2, data2);
+
+        dump = cache->dumpQueue();
+        assertEqual(dump, { Range(0, 4), Range(5, 9), Range(10, 14), Range(0, 4), Range(5, 9)  });
+
+        ASSERT_EQ(dump[0].key, key1);
+        ASSERT_EQ(dump[1].key, key1);
+        ASSERT_EQ(dump[2].key, key1);
+        ASSERT_EQ(dump[3].key, key2);
+        ASSERT_EQ(dump[4].key, key2);
+
+        assertProbationary(cache->dumpQueue(), { Range(0, 4), Range(5, 9) });
+        assertProtected(cache->dumpQueue(), { Range(10, 14), Range(0, 4), Range(5, 9)  });
+    }
 }

From 8ebd5a2d6c75d81a79c07a2463179314756852b9 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 7 Dec 2023 12:10:30 +0000
Subject: [PATCH 081/213] Fix type correction in HashJoin for nested low
 cardinality

---
 src/Interpreters/HashJoin.cpp                 | 71 +++++++++++--------
 src/Interpreters/JoinUtils.cpp                | 10 +--
 src/Interpreters/JoinUtils.h                  |  1 -
 ...oin_with_totals_and_subquery_bug.reference | 17 +++++
 ...2516_join_with_totals_and_subquery_bug.sql | 46 +++++++++++-
 5 files changed, 103 insertions(+), 42 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 060fe95958f..71e9d4bba80 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -32,7 +32,7 @@
 #include <Common/assert_cast.h>
 
 #include <Functions/FunctionHelpers.h>
-
+#include <Interpreters/castColumn.h>
 
 namespace DB
 {
@@ -217,7 +217,7 @@ static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nulla
     }
 }
 
-static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const ColumnUInt8 & negative_null_map)
+static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map)
 {
     if (nullable)
     {
@@ -1548,6 +1548,40 @@ IColumn::Filter switchJoinRightColumns(
     }
 }
 
+/** Since we do not store right key columns,
+  * this function is used to copy left key columns to right key columns.
+  * If the user requests some right columns, we just copy left key columns to right, since they are equal.
+  * Example: SELECT t1.key, t2.key FROM t1 FULL JOIN t2 ON t1.key = t2.key;
+  * In that case for matched rows in t2.key we will use values from t1.key.
+  * However, in some cases we might need to adjust the type of column, e.g. t1.key :: LowCardinality(String) and t2.key :: String
+  * Also, the nullability of the column might be different.
+  * Returns the right column after with necessary adjustments.
+  */
+ColumnWithTypeAndName copyLeftKeyColumnToRight(
+    const DataTypePtr & right_key_type, const String & renamed_right_column, const ColumnWithTypeAndName & left_column, const IColumn::Filter * null_map_filter = nullptr)
+{
+    ColumnWithTypeAndName right_column = left_column;
+    right_column.name = renamed_right_column;
+
+    if (null_map_filter)
+        right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter);
+
+    bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type);
+    if (null_map_filter)
+        correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter);
+    else
+        correctNullabilityInplace(right_column, should_be_nullable);
+
+    if (!right_column.type->equals(*right_key_type))
+    {
+        right_column.column = castColumnAccurate(right_column, right_key_type);
+        right_column.type = right_key_type;
+    }
+
+    right_column.column = right_column.column->convertToFullColumnIfConst();
+    return right_column;
+}
+
 } /// nameless
 
 template <JoinKind KIND, JoinStrictness STRICTNESS, typename Maps>
@@ -1614,31 +1648,19 @@ void HashJoin::joinBlockImpl(
             // renamed ???
             if (!block.findByName(right_key.name))
             {
-                const auto & left_name = required_right_keys_sources[i];
-
                 /// asof column is already in block.
                 if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back())
                     continue;
 
-                const auto & col = block.getByName(left_name);
-                bool is_nullable = JoinCommon::isNullable(right_key.type);
-                auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
-                ColumnWithTypeAndName right_col(col.column, col.type, right_col_name);
-                if (right_col.type->lowCardinality() != right_key.type->lowCardinality())
-                    JoinCommon::changeLowCardinalityInplace(right_col);
-                correctNullabilityInplace(right_col, is_nullable);
+                const auto & left_column = block.getByName(required_right_keys_sources[i]);
+                const auto & right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
+                auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column);
                 block.insert(std::move(right_col));
             }
         }
     }
     else if (has_required_right_keys)
     {
-        /// Some trash to represent IColumn::Filter as ColumnUInt8 needed for ColumnNullable::applyNullMap()
-        auto null_map_filter_ptr = ColumnUInt8::create();
-        ColumnUInt8 & null_map_filter = assert_cast<ColumnUInt8 &>(*null_map_filter_ptr);
-        null_map_filter.getData().swap(row_filter);
-        const IColumn::Filter & filter = null_map_filter.getData();
-
         /// Add join key columns from right block if needed.
         for (size_t i = 0; i < required_right_keys.columns(); ++i)
         {
@@ -1646,21 +1668,12 @@ void HashJoin::joinBlockImpl(
             auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
             if (!block.findByName(right_col_name))
             {
-                const auto & left_name = required_right_keys_sources[i];
-
                 /// asof column is already in block.
                 if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back())
                     continue;
 
-                const auto & col = block.getByName(left_name);
-                bool is_nullable = JoinCommon::isNullable(right_key.type);
-
-                ColumnPtr thin_column = JoinCommon::filterWithBlanks(col.column, filter);
-
-                ColumnWithTypeAndName right_col(thin_column, col.type, right_col_name);
-                if (right_col.type->lowCardinality() != right_key.type->lowCardinality())
-                    JoinCommon::changeLowCardinalityInplace(right_col);
-                correctNullabilityInplace(right_col, is_nullable, null_map_filter);
+                const auto & left_column = block.getByName(required_right_keys_sources[i]);
+                auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &row_filter);
                 block.insert(std::move(right_col));
 
                 if constexpr (join_features.need_replication)
@@ -2179,7 +2192,7 @@ BlocksList HashJoin::releaseJoinedBlocks(bool restructure)
         for (const auto & sample_column : right_sample_block)
         {
             positions.emplace_back(tmp_block.getPositionByName(sample_column.name));
-            is_nullable.emplace_back(JoinCommon::isNullable(sample_column.type));
+            is_nullable.emplace_back(isNullableOrLowCardinalityNullable(sample_column.type));
         }
     }
 
diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp
index 33c9dfa76ca..be5ee80bd53 100644
--- a/src/Interpreters/JoinUtils.cpp
+++ b/src/Interpreters/JoinUtils.cpp
@@ -120,19 +120,11 @@ bool canBecomeNullable(const DataTypePtr & type)
     return can_be_inside;
 }
 
-bool isNullable(const DataTypePtr & type)
-{
-    bool is_nullable = type->isNullable();
-    if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
-        is_nullable |= low_cardinality_type->getDictionaryType()->isNullable();
-    return is_nullable;
-}
-
 /// Add nullability to type.
 /// Note: LowCardinality(T) transformed to LowCardinality(Nullable(T))
 DataTypePtr convertTypeToNullable(const DataTypePtr & type)
 {
-    if (isNullable(type))
+    if (isNullableOrLowCardinalityNullable(type))
         return type;
 
     if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h
index f112ca22e5b..7daed6b7f7e 100644
--- a/src/Interpreters/JoinUtils.h
+++ b/src/Interpreters/JoinUtils.h
@@ -59,7 +59,6 @@ private:
 };
 
 
-bool isNullable(const DataTypePtr & type);
 bool canBecomeNullable(const DataTypePtr & type);
 DataTypePtr convertTypeToNullable(const DataTypePtr & type);
 void convertColumnToNullable(ColumnWithTypeAndName & column);
diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
index 19da8828c30..51e8394a2f7 100644
--- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
+++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
@@ -9,3 +9,20 @@
 \N
 
 100000000000000000000
+---
+['0']	2	['0']
+['0']	2	['0']
+['1']	1	['1']
+
+[]	3	[]
+---
+['0']	2	['0']	2
+['1']	1	['1']	1
+
+[]	3	[]	3
+---
+['0']	['0']	2
+['0']	['0']	2
+['1']	['1']	1
+
+[]	[]	3
diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
index 6b58d737a3e..243dceffc43 100644
--- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
+++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
@@ -1,5 +1,3 @@
-SET allow_experimental_analyzer = 1;
-
 SELECT *
 FROM
 (
@@ -34,7 +32,6 @@ INNER JOIN
 ) AS t2 USING (a)
 SETTINGS allow_experimental_analyzer=1;
 
-
 SELECT a
 FROM
 (
@@ -72,3 +69,46 @@ ALL LEFT JOIN
         WITH TOTALS
 ) AS js2 USING (a)
 ORDER BY b ASC NULLS FIRST;
+
+SELECT '---';
+SELECT
+    *
+FROM (
+    SELECT ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, count() FROM numbers(3) GROUP BY item_id
+    WITH TOTALS
+) AS l
+FULL JOIN (
+    SELECT ([toString(number % 2)] :: Array(String)) AS item_id FROM numbers(3)
+) AS r
+ON l.item_id = r.item_id
+ORDER BY 1,2,3
+;
+
+SELECT '---';
+SELECT
+    *
+FROM (
+    SELECT ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, count() FROM numbers(3) GROUP BY item_id
+    WITH TOTALS
+) AS l
+FULL JOIN (
+    SELECT ([toString(number % 2)] :: Array(String)) AS item_id, count() FROM numbers(3) GROUP BY item_id
+    WITH TOTALS
+) AS r
+ON l.item_id = r.item_id
+ORDER BY 1,2,3
+;
+
+SELECT '---';
+SELECT
+    *
+FROM (
+    SELECT ([toString(number % 2)] :: Array(String)) AS item_id FROM numbers(3)
+) AS l
+FULL JOIN (
+    SELECT ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, count() FROM numbers(3) GROUP BY item_id
+    WITH TOTALS
+) AS r
+ON l.item_id = r.item_id
+ORDER BY 1,2,3
+;

From ff0340b0fca45ec77ac2f7273b71875c13e357ce Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 8 Dec 2023 18:18:45 +0000
Subject: [PATCH 082/213] Release memory for aggregation earlier.

---
 src/Processors/Transforms/AggregatingTransform.cpp  | 13 +++++++++++++
 .../02933_group_by_memory_usage.reference           |  3 +++
 .../0_stateless/02933_group_by_memory_usage.sh      | 13 +++++++++++++
 3 files changed, 29 insertions(+)
 create mode 100644 tests/queries/0_stateless/02933_group_by_memory_usage.reference
 create mode 100755 tests/queries/0_stateless/02933_group_by_memory_usage.sh

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index bf475c57d36..6bcd9b2f686 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -123,7 +123,10 @@ protected:
         UInt32 bucket_num = shared_data->next_bucket_to_merge.fetch_add(1);
 
         if (bucket_num >= NUM_BUCKETS)
+        {
+            data.reset();
             return {};
+        }
 
         Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled);
         Chunk chunk = convertToChunk(block);
@@ -170,6 +173,8 @@ protected:
             return convertToChunk(block);
         }
 
+        variant.reset();
+
         return {};
     }
 
@@ -489,6 +494,7 @@ private:
             single_level_chunks.emplace_back(convertToChunk(block));
 
         finished = true;
+        data.reset();
     }
 
     void createSources()
@@ -504,6 +510,8 @@ private:
 
             processors.emplace_back(std::move(source));
         }
+
+        data.reset();
     }
 };
 
@@ -710,7 +718,10 @@ void AggregatingTransform::initGenerate()
     }
 
     if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size())
+    {
+        many_data.reset();
         return;
+    }
 
     if (!params->aggregator.hasTemporaryData())
     {
@@ -807,6 +818,8 @@ void AggregatingTransform::initGenerate()
 
         processors = Pipe::detachProcessors(std::move(pipe));
     }
+
+    many_data.reset();
 }
 
 }
diff --git a/tests/queries/0_stateless/02933_group_by_memory_usage.reference b/tests/queries/0_stateless/02933_group_by_memory_usage.reference
new file mode 100644
index 00000000000..1481fcaa297
--- /dev/null
+++ b/tests/queries/0_stateless/02933_group_by_memory_usage.reference
@@ -0,0 +1,3 @@
+Spin up a long running query
+1	1	1	1	1
+0
diff --git a/tests/queries/0_stateless/02933_group_by_memory_usage.sh b/tests/queries/0_stateless/02933_group_by_memory_usage.sh
new file mode 100755
index 00000000000..96ed2f3f0da
--- /dev/null
+++ b/tests/queries/0_stateless/02933_group_by_memory_usage.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Tags: long, no-random-settings
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+query_id="group-by-mem-usage-$CLICKHOUSE_DATABASE"
+
+echo "Spin up a long running query"
+${CLICKHOUSE_CLIENT} --query "with q as (select length(groupArray(toString(number))) as x  from numbers_mt(2e6) group by number order by x limit 1), q1 as (select * from q), q2 as (select * from q), q3 as (select * from q), q4 as (select * from q) select * from q, q1, q2, q3, q4 settings max_bytes_before_external_group_by='1G', max_memory_usage='2G'" --query_id "$query_id"
+${CLICKHOUSE_CLIENT} --query "system flush logs"
+${CLICKHOUSE_CLIENT} --query "select ProfileEvents['ExternalAggregationWritePart'] from system.query_log where type = 'QueryFinish' and query_id = '$query_id' and event_date >= today() - 1"

From 881e58dfcbcc9f5326a9650b7af373fe8f853136 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 8 Dec 2023 21:46:31 +0100
Subject: [PATCH 083/213] support soft limit ratio

---
 programs/keeper/Keeper.cpp                    | 30 +++++++++++++++++++
 src/Coordination/CoordinationSettings.h       |  1 -
 src/Coordination/KeeperContext.cpp            |  8 +++++
 src/Coordination/KeeperContext.h              |  5 ++++
 src/Coordination/KeeperDispatcher.cpp         |  4 ++-
 .../configs/keeper_config1.xml                |  2 +-
 .../configs/keeper_config2.xml                |  2 +-
 .../configs/keeper_config3.xml                |  3 +-
 8 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index e04e669abae..9db9e1a3a26 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -14,6 +14,7 @@
 #include <Common/assertProcessUserMatchesDataOwner.h>
 #include <Common/makeSocketAddress.h>
 #include <Server/waitServersToFinish.h>
+#include <base/getMemoryAmount.h>
 #include <base/scope_guard.h>
 #include <base/safeExit.h>
 #include <Poco/Net/NetException.h>
@@ -289,6 +290,33 @@ try
     if (!config().has("keeper_server"))
         throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration (<keeper_server> section) not found in config");
 
+    auto updateMemorySoftLimitInConfig = [&](Poco::Util::AbstractConfiguration & config)
+    {
+        UInt64 memory_soft_limit = 0;
+        if (config.has("keeper_server.max_memory_usage_soft_limit"))
+        {
+            memory_soft_limit = config.getUInt64("keeper_server.max_memory_usage_soft_limit");
+        }
+
+        /// if memory soft limit is not set, we will use default value
+        if (memory_soft_limit == 0)
+        {
+            Float64 ratio = 0.9;
+            if (config.has("keeper_server.max_memory_usage_soft_limit_ratio"))
+                ratio = config.getDouble("keeper_server.max_memory_usage_soft_limit_ratio");
+
+            size_t physical_server_memory = getMemoryAmount();
+            if (ratio > 0 && physical_server_memory > 0)
+            {
+                memory_soft_limit = static_cast<UInt64>(physical_server_memory * ratio);
+                config.setUInt64("keeper_server.max_memory_usage_soft_limit", memory_soft_limit);
+            }
+        }
+        LOG_INFO(log, "keeper_server.max_memory_usage_soft_limit is set to {}", formatReadableSizeWithBinarySuffix(memory_soft_limit));
+    };
+
+    updateMemorySoftLimitInConfig(config());
+
     std::string path;
 
     if (config().has("keeper_server.storage_path"))
@@ -492,6 +520,8 @@ try
         {
             updateLevels(*config, logger());
 
+            updateMemorySoftLimitInConfig(*config);
+
             if (config->has("keeper_server"))
                 global_context->updateKeeperConfiguration(*config);
 
diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index 13ef304b353..a58f2b04797 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -43,7 +43,6 @@ struct Settings;
     M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
     M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
     M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
-    M(UInt64, max_memory_usage_soft_limit, 0, "Soft limit in bytes of keeper memory usage", 0) \
     M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
     M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
     M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp
index 7e0b75a6353..6bb5b066d9f 100644
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@@ -59,6 +59,8 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
         }
     }
 
+    updateKeeperMemorySoftLimit(config);
+
     digest_enabled = config.getBool("keeper_server.digest_enabled", false);
     ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false);
 
@@ -375,4 +377,10 @@ void KeeperContext::initializeFeatureFlags(const Poco::Util::AbstractConfigurati
     feature_flags.logFlags(&Poco::Logger::get("KeeperContext"));
 }
 
+void KeeperContext::updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config)
+{
+    if (config.hasProperty("keeper_server.max_memory_usage_soft_limit"))
+        memory_soft_limit = config.getUInt64("keeper_server.max_memory_usage_soft_limit");
+}
+
 }
diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h
index 1af34b19ccf..c1c34db2c4b 100644
--- a/src/Coordination/KeeperContext.h
+++ b/src/Coordination/KeeperContext.h
@@ -53,6 +53,9 @@ public:
 
     constexpr KeeperDispatcher * getDispatcher() const { return dispatcher; }
 
+    UInt64 getKeeperMemorySoftLimit() const { return memory_soft_limit; }
+    void updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config);
+
     /// set to true when we have preprocessed or committed all the logs
     /// that were already present locally during startup
     std::atomic<bool> local_logs_preprocessed = false;
@@ -92,6 +95,8 @@ private:
 
     KeeperFeatureFlags feature_flags;
     KeeperDispatcher * dispatcher{nullptr};
+
+    std::atomic<UInt64> memory_soft_limit = 0;
 };
 
 using KeeperContextPtr = std::shared_ptr<KeeperContext>;
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 1299e9c9f20..dcd22552fe3 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -143,7 +143,7 @@ void KeeperDispatcher::requestThread()
                 if (shutdown_called)
                     break;
 
-                Int64 mem_soft_limit = configuration_and_settings->coordination_settings->max_memory_usage_soft_limit;
+                Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit();
                 if (configuration_and_settings->standalone_keeper && mem_soft_limit > 0 && total_memory_tracker.get() >= mem_soft_limit && checkIfRequestIncreaseMem(request.request))
                 {
                     LOG_TRACE(log, "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type is {}", mem_soft_limit, total_memory_tracker.get(), request.request->getOpNum());
@@ -930,6 +930,8 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati
                 throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue");
 
     snapshot_s3.updateS3Configuration(config, macros);
+
+    keeper_context->updateKeeperMemorySoftLimit(config);
 }
 
 void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms)
diff --git a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config1.xml b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config1.xml
index fe45d09d915..642cf16414e 100644
--- a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config1.xml
+++ b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config1.xml
@@ -15,6 +15,7 @@
             <value>az-zoo1</value>
         </availability_zone>
         <server_id>1</server_id>
+        <max_memory_usage_soft_limit>200000000</max_memory_usage_soft_limit>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
@@ -23,7 +24,6 @@
             <force_sync>false</force_sync>
             <election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
             <election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
-	    <max_memory_usage_soft_limit>200000000</max_memory_usage_soft_limit>
 
             <async_replication>1</async_replication>
         </coordination_settings>
diff --git a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml
index f7f6a5718b5..25ececea3e8 100644
--- a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml
+++ b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml
@@ -16,6 +16,7 @@
             <value>az-zoo2</value>
             <enable_auto_detection_on_cloud>1</enable_auto_detection_on_cloud>
         </availability_zone>
+        <max_memory_usage_soft_limit>20000000</max_memory_usage_soft_limit>
 
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
@@ -24,7 +25,6 @@
             <force_sync>false</force_sync>
             <election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
             <election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
-	    <max_memory_usage_soft_limit>20000000</max_memory_usage_soft_limit>
 
             <async_replication>1</async_replication>
         </coordination_settings>
diff --git a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml
index 82345aebc46..81e343b77c9 100644
--- a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml
+++ b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml
@@ -13,6 +13,8 @@
         <tcp_port>2181</tcp_port>
         <server_id>3</server_id>
 
+        <max_memory_usage_soft_limit>20000000</max_memory_usage_soft_limit>
+
         <coordination_settings>
             <operation_timeout_ms>10000</operation_timeout_ms>
             <session_timeout_ms>15000</session_timeout_ms>
@@ -20,7 +22,6 @@
             <force_sync>false</force_sync>
             <election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
             <election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
-	    <max_memory_usage_soft_limit>20000000</max_memory_usage_soft_limit>
 
             <async_replication>1</async_replication>
         </coordination_settings>

From a76d9c259f690baee094e024d78059db40b4e644 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 8 Dec 2023 21:06:18 +0000
Subject: [PATCH 084/213] Fixing style

---
 src/Processors/Transforms/AggregatingTransform.cpp       | 2 +-
 tests/queries/0_stateless/02933_group_by_memory_usage.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index 6bcd9b2f686..dacd6261477 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -289,7 +289,7 @@ public:
 
     void work() override
     {
-        if (data->empty())
+        if (!data || data->empty())
         {
             finished = true;
             return;
diff --git a/tests/queries/0_stateless/02933_group_by_memory_usage.sh b/tests/queries/0_stateless/02933_group_by_memory_usage.sh
index 96ed2f3f0da..bb1bbbf16a2 100755
--- a/tests/queries/0_stateless/02933_group_by_memory_usage.sh
+++ b/tests/queries/0_stateless/02933_group_by_memory_usage.sh
@@ -10,4 +10,4 @@ query_id="group-by-mem-usage-$CLICKHOUSE_DATABASE"
 echo "Spin up a long running query"
 ${CLICKHOUSE_CLIENT} --query "with q as (select length(groupArray(toString(number))) as x  from numbers_mt(2e6) group by number order by x limit 1), q1 as (select * from q), q2 as (select * from q), q3 as (select * from q), q4 as (select * from q) select * from q, q1, q2, q3, q4 settings max_bytes_before_external_group_by='1G', max_memory_usage='2G'" --query_id "$query_id"
 ${CLICKHOUSE_CLIENT} --query "system flush logs"
-${CLICKHOUSE_CLIENT} --query "select ProfileEvents['ExternalAggregationWritePart'] from system.query_log where type = 'QueryFinish' and query_id = '$query_id' and event_date >= today() - 1"
+${CLICKHOUSE_CLIENT} --query "select ProfileEvents['ExternalAggregationWritePart'] from system.query_log where current_database = currentDatabase() and type = 'QueryFinish' and query_id = '$query_id' and event_date >= today() - 1"

From 5e1dccf1f423cb9d2738c89b858ec1eaa5698601 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 8 Dec 2023 22:36:01 +0000
Subject: [PATCH 085/213] Fixing tests

---
 src/Processors/Transforms/AggregatingTransform.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index dacd6261477..ec8ea9396e4 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -289,7 +289,7 @@ public:
 
     void work() override
     {
-        if (!data || data->empty())
+        if (data->empty())
         {
             finished = true;
             return;
@@ -424,7 +424,7 @@ private:
             return Status::Finished;
         }
 
-        return has_rows ? Status::PortFull : Status::Ready;
+        return has_rows ? Status::PortFull : Status::NeedData;
     }
 
     AggregatingTransformParamsPtr params;

From 63619b59353699b5975acedffe0f2ca85e3985d7 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 9 Dec 2023 17:57:32 +0000
Subject: [PATCH 086/213] allow IPv6 to UInt128 conversion and binary
 arithmetic

---
 src/Functions/FunctionBinaryArithmetic.h | 30 ++++++++++++++++++++++++
 src/Functions/FunctionsConversion.h      | 12 ++++++++++
 2 files changed, 42 insertions(+)

diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 33f62b8da8c..1b2519d1ec5 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -1483,6 +1483,17 @@ public:
             return getReturnTypeImplStatic(new_arguments, context);
         }
 
+        /// Special case - one or both arguments are IPv6
+        if (isIPv6(arguments[0]) || isIPv6(arguments[1]))
+        {
+            DataTypes new_arguments {
+                    isIPv6(arguments[0]) ? std::make_shared<DataTypeUInt128>() : arguments[0],
+                    isIPv6(arguments[1]) ? std::make_shared<DataTypeUInt128>() : arguments[1],
+            };
+
+            return getReturnTypeImplStatic(new_arguments, context);
+        }
+
 
         if constexpr (is_plus || is_minus)
         {
@@ -2181,6 +2192,25 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
             return executeImpl2(new_arguments, result_type, input_rows_count, right_nullmap);
         }
 
+        /// Special case - one or both arguments are IPv6
+        if (isIPv6(arguments[0].type) || isIPv6(arguments[1].type))
+        {
+            ColumnsWithTypeAndName new_arguments {
+                {
+                    isIPv6(arguments[0].type) ? castColumn(arguments[0], std::make_shared<DataTypeUInt128>()) : arguments[0].column,
+                    isIPv6(arguments[0].type) ? std::make_shared<DataTypeUInt128>() : arguments[0].type,
+                    arguments[0].name,
+                },
+                {
+                    isIPv6(arguments[1].type) ? castColumn(arguments[1], std::make_shared<DataTypeUInt128>()) : arguments[1].column,
+                    isIPv6(arguments[1].type) ? std::make_shared<DataTypeUInt128>() : arguments[1].type,
+                    arguments[1].name
+                }
+            };
+
+            return executeImpl2(new_arguments, result_type, input_rows_count, right_nullmap);
+        }
+
         const auto * const left_generic = left_argument.type.get();
         const auto * const right_generic = right_argument.type.get();
         ColumnPtr res;
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index d7c2c70884b..bef1e7b420a 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -221,6 +221,18 @@ struct ConvertImpl
                     continue;
                 }
 
+                if constexpr (std::is_same_v<FromDataType, DataTypeIPv6> && std::is_same_v<ToDataType, DataTypeUInt128>)
+                {
+                    static_assert(
+                        std::is_same_v<DataTypeUInt128::FieldType, DataTypeUUID::FieldType::UnderlyingType>,
+                        "UInt128 and IPv6 types must be same");
+
+                    vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]);
+                    vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]);
+
+                    continue;
+                }
+
                 if constexpr (std::is_same_v<FromDataType, DataTypeUUID> != std::is_same_v<ToDataType, DataTypeUUID>)
                 {
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED,

From 3ac3a06561f8f98fd3f38b9048a5b1a44c263377 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 9 Dec 2023 23:31:05 +0100
Subject: [PATCH 087/213] Add ClickBench to CI

---
 .github/workflows/pull_request.yml          |  25 ++
 docker/images.json                          |   5 +
 docker/test/clickbench/Dockerfile           |  10 +
 docker/test/clickbench/create.sql           | 112 +++++++++
 docker/test/clickbench/queries.sql          |  43 ++++
 docker/test/clickbench/run.sh               |  52 ++++
 tests/ci/clickbench.py                      | 261 ++++++++++++++++++++
 tests/ci/fast_test_check.py                 |   2 +-
 tests/ci/functional_test_check.py           |   2 +-
 tests/ci/integration_test_check.py          |   2 +-
 tests/ci/test_docker.py                     |   6 +
 tests/ci/tests/docker_images_for_tests.json |   5 +
 12 files changed, 522 insertions(+), 3 deletions(-)
 create mode 100644 docker/test/clickbench/Dockerfile
 create mode 100644 docker/test/clickbench/create.sql
 create mode 100644 docker/test/clickbench/queries.sql
 create mode 100755 docker/test/clickbench/run.sh
 create mode 100644 tests/ci/clickbench.py

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index a6631a93766..3db382b1fd6 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -701,6 +701,31 @@ jobs:
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
+########################### ClickBench #######################################################
+##############################################################################################
+  ClickBenchAMD64:
+    needs: [BuilderDebRelease]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (amd64)
+      runner_type: func-tester
+      additional_envs: |
+        KILL_TIMEOUT=1800
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT"
+  ClickBenchAarch64:
+    needs: [ BuilderDebAarch64 ]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (aarch64)
+      runner_type: func-tester-aarch64
+      additional_envs: |
+        KILL_TIMEOUT=1800
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT"
+##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
   StressTestAsan:
diff --git a/docker/images.json b/docker/images.json
index 1535715648c..d2f098f53d7 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -125,6 +125,7 @@
             "docker/test/server-jepsen",
             "docker/test/sqllogic",
             "docker/test/sqltest",
+            "docker/test/clickbench",
             "docker/test/stateless"
          ]
     },
@@ -145,6 +146,10 @@
         "name": "clickhouse/server-jepsen-test",
         "dependent": []
     },
+    "docker/test/clickbench": {
+        "name": "clickhouse/clickbench",
+        "dependent": []
+    },
     "docker/test/install/deb": {
         "name": "clickhouse/install-deb-test",
         "dependent": []
diff --git a/docker/test/clickbench/Dockerfile b/docker/test/clickbench/Dockerfile
new file mode 100644
index 00000000000..0b6b1736e03
--- /dev/null
+++ b/docker/test/clickbench/Dockerfile
@@ -0,0 +1,10 @@
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG
+
+ENV TZ=Europe/Amsterdam
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+COPY *.sh /
+COPY *.sql /
+
+CMD ["/bin/bash", "/run.sh"]
diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql
new file mode 100644
index 00000000000..620bdf09331
--- /dev/null
+++ b/docker/test/clickbench/create.sql
@@ -0,0 +1,112 @@
+ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955'
+(
+    WatchID BIGINT NOT NULL,
+    JavaEnable SMALLINT NOT NULL,
+    Title TEXT NOT NULL,
+    GoodEvent SMALLINT NOT NULL,
+    EventTime TIMESTAMP NOT NULL,
+    EventDate Date NOT NULL,
+    CounterID INTEGER NOT NULL,
+    ClientIP INTEGER NOT NULL,
+    RegionID INTEGER NOT NULL,
+    UserID BIGINT NOT NULL,
+    CounterClass SMALLINT NOT NULL,
+    OS SMALLINT NOT NULL,
+    UserAgent SMALLINT NOT NULL,
+    URL TEXT NOT NULL,
+    Referer TEXT NOT NULL,
+    IsRefresh SMALLINT NOT NULL,
+    RefererCategoryID SMALLINT NOT NULL,
+    RefererRegionID INTEGER NOT NULL,
+    URLCategoryID SMALLINT NOT NULL,
+    URLRegionID INTEGER NOT NULL,
+    ResolutionWidth SMALLINT NOT NULL,
+    ResolutionHeight SMALLINT NOT NULL,
+    ResolutionDepth SMALLINT NOT NULL,
+    FlashMajor SMALLINT NOT NULL,
+    FlashMinor SMALLINT NOT NULL,
+    FlashMinor2 TEXT NOT NULL,
+    NetMajor SMALLINT NOT NULL,
+    NetMinor SMALLINT NOT NULL,
+    UserAgentMajor SMALLINT NOT NULL,
+    UserAgentMinor VARCHAR(255) NOT NULL,
+    CookieEnable SMALLINT NOT NULL,
+    JavascriptEnable SMALLINT NOT NULL,
+    IsMobile SMALLINT NOT NULL,
+    MobilePhone SMALLINT NOT NULL,
+    MobilePhoneModel TEXT NOT NULL,
+    Params TEXT NOT NULL,
+    IPNetworkID INTEGER NOT NULL,
+    TraficSourceID SMALLINT NOT NULL,
+    SearchEngineID SMALLINT NOT NULL,
+    SearchPhrase TEXT NOT NULL,
+    AdvEngineID SMALLINT NOT NULL,
+    IsArtifical SMALLINT NOT NULL,
+    WindowClientWidth SMALLINT NOT NULL,
+    WindowClientHeight SMALLINT NOT NULL,
+    ClientTimeZone SMALLINT NOT NULL,
+    ClientEventTime TIMESTAMP NOT NULL,
+    SilverlightVersion1 SMALLINT NOT NULL,
+    SilverlightVersion2 SMALLINT NOT NULL,
+    SilverlightVersion3 INTEGER NOT NULL,
+    SilverlightVersion4 SMALLINT NOT NULL,
+    PageCharset TEXT NOT NULL,
+    CodeVersion INTEGER NOT NULL,
+    IsLink SMALLINT NOT NULL,
+    IsDownload SMALLINT NOT NULL,
+    IsNotBounce SMALLINT NOT NULL,
+    FUniqID BIGINT NOT NULL,
+    OriginalURL TEXT NOT NULL,
+    HID INTEGER NOT NULL,
+    IsOldCounter SMALLINT NOT NULL,
+    IsEvent SMALLINT NOT NULL,
+    IsParameter SMALLINT NOT NULL,
+    DontCountHits SMALLINT NOT NULL,
+    WithHash SMALLINT NOT NULL,
+    HitColor CHAR NOT NULL,
+    LocalEventTime TIMESTAMP NOT NULL,
+    Age SMALLINT NOT NULL,
+    Sex SMALLINT NOT NULL,
+    Income SMALLINT NOT NULL,
+    Interests SMALLINT NOT NULL,
+    Robotness SMALLINT NOT NULL,
+    RemoteIP INTEGER NOT NULL,
+    WindowName INTEGER NOT NULL,
+    OpenerName INTEGER NOT NULL,
+    HistoryLength SMALLINT NOT NULL,
+    BrowserLanguage TEXT NOT NULL,
+    BrowserCountry TEXT NOT NULL,
+    SocialNetwork TEXT NOT NULL,
+    SocialAction TEXT NOT NULL,
+    HTTPError SMALLINT NOT NULL,
+    SendTiming INTEGER NOT NULL,
+    DNSTiming INTEGER NOT NULL,
+    ConnectTiming INTEGER NOT NULL,
+    ResponseStartTiming INTEGER NOT NULL,
+    ResponseEndTiming INTEGER NOT NULL,
+    FetchTiming INTEGER NOT NULL,
+    SocialSourceNetworkID SMALLINT NOT NULL,
+    SocialSourcePage TEXT NOT NULL,
+    ParamPrice BIGINT NOT NULL,
+    ParamOrderID TEXT NOT NULL,
+    ParamCurrency TEXT NOT NULL,
+    ParamCurrencyID SMALLINT NOT NULL,
+    OpenstatServiceName TEXT NOT NULL,
+    OpenstatCampaignID TEXT NOT NULL,
+    OpenstatAdID TEXT NOT NULL,
+    OpenstatSourceID TEXT NOT NULL,
+    UTMSource TEXT NOT NULL,
+    UTMMedium TEXT NOT NULL,
+    UTMCampaign TEXT NOT NULL,
+    UTMContent TEXT NOT NULL,
+    UTMTerm TEXT NOT NULL,
+    FromTag TEXT NOT NULL,
+    HasGCLID SMALLINT NOT NULL,
+    RefererHash BIGINT NOT NULL,
+    URLHash BIGINT NOT NULL,
+    CLID INTEGER NOT NULL,
+    PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
+)
+ENGINE = MergeTree
+SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G',
+         disk = disk(type = web, endpoint = 'https://clickhouse-public-datasets.s3.amazonaws.com/web/'));
diff --git a/docker/test/clickbench/queries.sql b/docker/test/clickbench/queries.sql
new file mode 100644
index 00000000000..31f65fc898d
--- /dev/null
+++ b/docker/test/clickbench/queries.sql
@@ -0,0 +1,43 @@
+SELECT COUNT(*) FROM hits;
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
+SELECT AVG(UserID) FROM hits;
+SELECT COUNT(DISTINCT UserID) FROM hits;
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
+SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
+SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
new file mode 100755
index 00000000000..6e02a346f04
--- /dev/null
+++ b/docker/test/clickbench/run.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# shellcheck disable=SC1091
+source /setup_export_logs.sh
+
+# fail on errors, verbose and export all env variables
+set -e -x -a
+
+dpkg -i package_folder/clickhouse-common-static_*.deb
+dpkg -i package_folder/clickhouse-server_*.deb
+dpkg -i package_folder/clickhouse-client_*.deb
+
+# A directory for cache
+sudo mkdir /dev/shm/clickhouse
+sudo chown clickhouse:clickhouse /dev/shm/clickhouse
+
+sudo clickhouse start
+
+# Wait for the server to start, but not for too long.
+for _ in {1..100}
+do
+    clickhouse-client --query "SELECT 1" && break
+    sleep 1
+done
+
+setup_logs_replication
+
+# Load the data
+
+clickhouse-client --time < /create.sql
+
+# Run the queries
+
+TRIES=3
+QUERY_NUM=1
+cat /queries.sql | while read query; do
+    echo -n "["
+    for i in $(seq 1 $TRIES); do
+        RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:)
+        [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
+        [[ "$i" != $TRIES ]] && echo -n ", "
+
+        echo "${QUERY_NUM},${i},${RES}" >> /test_output/test_results.tsv
+    done
+    echo "],"
+
+    QUERY_NUM=$((QUERY_NUM + 1))
+done
+
+clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'"
+
+echo -e "success\tClickBench finished" > /test_output/check_status.tsv
diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
new file mode 100644
index 00000000000..0d3452b7a64
--- /dev/null
+++ b/tests/ci/clickbench.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import logging
+import os
+import re
+import subprocess
+import sys
+import atexit
+from pathlib import Path
+from typing import List, Tuple
+
+from github import Github
+
+from build_download_helper import download_all_deb_packages
+from clickhouse_helper import (
+    CiLogsCredentials,
+    ClickHouseHelper,
+    prepare_tests_results_for_clickhouse,
+)
+from commit_status_helper import (
+    NotSet,
+    RerunHelper,
+    get_commit,
+    override_status,
+    post_commit_status,
+    post_commit_status_to_file,
+    update_mergeable_check,
+)
+from docker_pull_helper import DockerImage, get_image_with_version
+from download_release_packages import download_last_release
+from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import FORCE_TESTS_LABEL, PRInfo
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
+from tee_popen import TeePopen
+from upload_result_helper import upload_results
+
+NO_CHANGES_MSG = "Nothing to run"
+
+
+def get_image_name(check_name: str) -> str:
+    return "clickhouse/clickbench"
+
+
+def get_run_command(
+    check_name: str,
+    builds_path: Path,
+    result_path: Path,
+    server_log_path: Path,
+    additional_envs: List[str],
+    ci_logs_args: str,
+    image: DockerImage,
+) -> str:
+
+    envs = [f"-e {e}" for e in additional_envs]
+
+    env_str = " ".join(envs)
+
+    return (
+        f"docker run --volume={builds_path}:/package_folder "
+        f"{ci_logs_args}"
+        f"--volume={result_path}:/test_output "
+        f"--volume={server_log_path}:/var/log/clickhouse-server "
+        f"--cap-add=SYS_PTRACE {env_str} {image}"
+    )
+
+def process_results(
+    result_directory: Path,
+    server_log_path: Path,
+) -> Tuple[str, str, TestResults, List[Path]]:
+    test_results = []  # type: TestResults
+    additional_files = []
+    # Just upload all files from result_directory.
+    # If task provides processed results, then it's responsible for content of result_directory.
+    if result_directory.exists():
+        additional_files = [p for p in result_directory.iterdir() if p.is_file()]
+
+    if server_log_path.exists():
+        additional_files = additional_files + [
+            p for p in server_log_path.iterdir() if p.is_file()
+        ]
+
+    status = []
+    status_path = result_directory / "check_status.tsv"
+    if status_path.exists():
+        logging.info("Found check_status.tsv")
+        with open(status_path, "r", encoding="utf-8") as status_file:
+            status = list(csv.reader(status_file, delimiter="\t"))
+
+    if len(status) != 1 or len(status[0]) != 2:
+        logging.info("Files in result folder %s", os.listdir(result_directory))
+        return "error", "Invalid check_status.tsv", test_results, additional_files
+    state, description = status[0][0], status[0][1]
+
+    try:
+        results_path = result_directory / "test_results.tsv"
+
+        if results_path.exists():
+            logging.info("Found test_results.tsv")
+        else:
+            logging.info("Files in result folder %s", os.listdir(result_directory))
+            return "error", "Not found test_results.tsv", test_results, additional_files
+
+    except Exception as e:
+        return (
+            "error",
+            f"Cannot parse test_results.tsv ({e})",
+            test_results,
+            additional_files,
+        )
+
+    return state, description, test_results, additional_files
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("check_name")
+    parser.add_argument("kill_timeout", type=int)
+    parser.add_argument(
+        "--post-commit-status",
+        default="commit_status",
+        choices=["commit_status", "file"],
+        help="Where to public post commit status",
+    )
+    return parser.parse_args()
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+
+    stopwatch = Stopwatch()
+
+    temp_path = Path(TEMP_PATH)
+    temp_path.mkdir(parents=True, exist_ok=True)
+
+    repo_path = Path(REPO_COPY)
+    reports_path = Path(REPORTS_PATH)
+    post_commit_path = temp_path / "clickbench_status.tsv"
+
+    args = parse_args()
+    check_name = args.check_name
+    kill_timeout = args.kill_timeout
+
+    gh = Github(get_best_robot_token(), per_page=100)
+
+    pr_info = PRInfo()
+
+    commit = get_commit(gh, pr_info.sha)
+    atexit.register(update_mergeable_check, gh, pr_info, check_name)
+
+    rerun_helper = RerunHelper(commit, check_name)
+    if rerun_helper.is_already_finished_by_status():
+        logging.info("Check is already finished according to github status, exiting")
+        sys.exit(0)
+
+    image_name = get_image_name(check_name)
+    docker_image = get_image_with_version(reports_path, image_name)
+
+    packages_path = temp_path / "packages"
+    packages_path.mkdir(parents=True, exist_ok=True)
+
+    download_all_deb_packages(check_name, reports_path, packages_path)
+
+    server_log_path = temp_path / "server_log"
+    server_log_path.mkdir(parents=True, exist_ok=True)
+
+    result_path = temp_path / "result_path"
+    result_path.mkdir(parents=True, exist_ok=True)
+
+    run_log_path = result_path / "run.log"
+
+    additional_envs = []
+
+    ci_logs_credentials = CiLogsCredentials(temp_path / "export-logs-config.sh")
+    ci_logs_args = ci_logs_credentials.get_docker_arguments(
+        pr_info, stopwatch.start_time_str, check_name
+    )
+
+    run_command = get_run_command(
+        check_name,
+        packages_path,
+        repo_path,
+        result_path,
+        server_log_path,
+        kill_timeout,
+        additional_envs,
+        ci_logs_args,
+        docker_image,
+    )
+    logging.info("Going to run ClickBench: %s", run_command)
+
+    with TeePopen(run_command, run_log_path) as process:
+        retcode = process.wait()
+        if retcode == 0:
+            logging.info("Run successfully")
+        else:
+            logging.info("Run failed")
+
+    try:
+        subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+    except subprocess.CalledProcessError:
+        logging.warning("Failed to change files owner in %s, ignoring it", temp_path)
+
+    ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path)
+    s3_helper = S3Helper()
+
+    state, description, test_results, additional_logs = process_results(
+        result_path, server_log_path
+    )
+    state = override_status(state, check_name)
+
+    ch_helper = ClickHouseHelper()
+
+    report_url = upload_results(
+        s3_helper,
+        pr_info.number,
+        pr_info.sha,
+        test_results,
+        [run_log_path] + additional_logs,
+        check_name,
+    )
+
+    print(f"::notice:: {check_name} Report url: {report_url}")
+    if args.post_commit_status == "commit_status":
+        post_commit_status(
+            commit, state, report_url, description, check_name_with_group, pr_info
+        )
+    elif args.post_commit_status == "file":
+        post_commit_status_to_file(
+            post_commit_path,
+            description,
+            state,
+            report_url,
+        )
+    else:
+        raise Exception(
+            f'Unknown post_commit_status option "{args.post_commit_status}"'
+        )
+
+    prepared_events = prepare_tests_results_for_clickhouse(
+        pr_info,
+        test_results,
+        state,
+        stopwatch.duration_seconds,
+        stopwatch.start_time_str,
+        report_url,
+    )
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
+
+    if state != "success":
+        if FORCE_TESTS_LABEL in pr_info.labels:
+            print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
+        else:
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index ee046d6dfef..f5c7342d6f4 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -72,7 +72,7 @@ def process_results(result_directory: Path) -> Tuple[str, str, TestResults]:
     status = []
     status_path = result_directory / "check_status.tsv"
     if status_path.exists():
-        logging.info("Found test_results.tsv")
+        logging.info("Found check_status.tsv")
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
     if len(status) != 1 or len(status[0]) != 2:
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index f85c44f617d..c8b3e42eed4 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -169,7 +169,7 @@ def process_results(
     status = []
     status_path = result_directory / "check_status.tsv"
     if status_path.exists():
-        logging.info("Found test_results.tsv")
+        logging.info("Found check_status.tsv")
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
 
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index 8535d71c5c4..b22aa08354c 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -118,7 +118,7 @@ def process_results(
     status = []
     status_path = result_directory / "check_status.tsv"
     if status_path.exists():
-        logging.info("Found test_results.tsv")
+        logging.info("Found check_status.tsv")
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
 
diff --git a/tests/ci/test_docker.py b/tests/ci/test_docker.py
index 8aab50ed082..f15fd1ae63f 100644
--- a/tests/ci/test_docker.py
+++ b/tests/ci/test_docker.py
@@ -64,6 +64,12 @@ class TestDockerImageCheck(unittest.TestCase):
                     False,
                     "clickhouse/test-base",  # type: ignore
                 ),
+                di.DockerImage(
+                    "docker/test/clickbench",
+                    "clickhouse/clickbench",
+                    False,
+                    "clickhouse/test-base",  # type: ignore
+                ),
                 di.DockerImage(
                     "docker/test/keeper-jepsen",
                     "clickhouse/keeper-jepsen-test",
diff --git a/tests/ci/tests/docker_images_for_tests.json b/tests/ci/tests/docker_images_for_tests.json
index 70db8760561..19e4b94bcdf 100644
--- a/tests/ci/tests/docker_images_for_tests.json
+++ b/tests/ci/tests/docker_images_for_tests.json
@@ -120,6 +120,7 @@
             "docker/test/integration/base",
             "docker/test/fuzzer",
             "docker/test/keeper-jepsen",
+            "docker/test/clickbench",
             "docker/test/sqltest"
          ]
     },
@@ -131,6 +132,10 @@
         "name": "clickhouse/sqlancer-test",
         "dependent": []
     },
+    "docker/test/clickbench": {
+        "name": "clickhouse/clickbench",
+        "dependent": []
+    },
     "docker/test/keeper-jepsen": {
         "name": "clickhouse/keeper-jepsen-test",
         "dependent": []

From f8f42d82ddcdb3f48f4ceab0154486ad4b80a8b0 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 9 Dec 2023 22:52:58 +0000
Subject: [PATCH 088/213] Automatic style fix

---
 tests/ci/clickbench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 0d3452b7a64..2b320d69e7d 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -54,7 +54,6 @@ def get_run_command(
     ci_logs_args: str,
     image: DockerImage,
 ) -> str:
-
     envs = [f"-e {e}" for e in additional_envs]
 
     env_str = " ".join(envs)
@@ -67,6 +66,7 @@ def get_run_command(
         f"--cap-add=SYS_PTRACE {env_str} {image}"
     )
 
+
 def process_results(
     result_directory: Path,
     server_log_path: Path,

From 17772dc41c3098c5a8411e80cd970e6d73c45ce2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 00:03:33 +0100
Subject: [PATCH 089/213] Recommendation from @felixoid

---
 tests/ci/tests/docker_images_for_tests.json | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/ci/tests/docker_images_for_tests.json b/tests/ci/tests/docker_images_for_tests.json
index 19e4b94bcdf..70db8760561 100644
--- a/tests/ci/tests/docker_images_for_tests.json
+++ b/tests/ci/tests/docker_images_for_tests.json
@@ -120,7 +120,6 @@
             "docker/test/integration/base",
             "docker/test/fuzzer",
             "docker/test/keeper-jepsen",
-            "docker/test/clickbench",
             "docker/test/sqltest"
          ]
     },
@@ -132,10 +131,6 @@
         "name": "clickhouse/sqlancer-test",
         "dependent": []
     },
-    "docker/test/clickbench": {
-        "name": "clickhouse/clickbench",
-        "dependent": []
-    },
     "docker/test/keeper-jepsen": {
         "name": "clickhouse/keeper-jepsen-test",
         "dependent": []

From f2875068be79c2b1838aad693e338e0bbf8b3b96 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 00:19:30 +0100
Subject: [PATCH 090/213] Style

---
 docker/test/clickbench/run.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 6e02a346f04..eadaa2b5939 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -33,19 +33,19 @@ clickhouse-client --time < /create.sql
 
 TRIES=3
 QUERY_NUM=1
-cat /queries.sql | while read query; do
+while read query; do
     echo -n "["
     for i in $(seq 1 $TRIES); do
         RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:)
-        [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
-        [[ "$i" != $TRIES ]] && echo -n ", "
+        echo -n "${RES}"
+        [[ "$i" != "$TRIES" ]] && echo -n ", "
 
         echo "${QUERY_NUM},${i},${RES}" >> /test_output/test_results.tsv
     done
     echo "],"
 
     QUERY_NUM=$((QUERY_NUM + 1))
-done
+done < /queries.sql
 
 clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'"
 

From 2602a7ba817b23668f46aade018601da3214669b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 00:22:19 +0100
Subject: [PATCH 091/213] Python

---
 tests/ci/clickbench.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 2b320d69e7d..5f41d3d497f 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -41,12 +41,11 @@ from upload_result_helper import upload_results
 NO_CHANGES_MSG = "Nothing to run"
 
 
-def get_image_name(check_name: str) -> str:
+def get_image_name() -> str:
     return "clickhouse/clickbench"
 
 
 def get_run_command(
-    check_name: str,
     builds_path: Path,
     result_path: Path,
     server_log_path: Path,
@@ -70,8 +69,8 @@ def get_run_command(
 def process_results(
     result_directory: Path,
     server_log_path: Path,
-) -> Tuple[str, str, TestResults, List[Path]]:
-    test_results = []  # type: TestResults
+) -> Tuple[str, str, List[str], List[Path]]:
+    test_results = []
     additional_files = []
     # Just upload all files from result_directory.
     # If task provides processed results, then it's responsible for content of result_directory.
@@ -156,7 +155,7 @@ def main():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
 
-    image_name = get_image_name(check_name)
+    image_name = get_image_name()
     docker_image = get_image_with_version(reports_path, image_name)
 
     packages_path = temp_path / "packages"
@@ -180,9 +179,7 @@ def main():
     )
 
     run_command = get_run_command(
-        check_name,
         packages_path,
-        repo_path,
         result_path,
         server_log_path,
         kill_timeout,

From ea31c8471f11ccc4c753ae98643fbac3715c2b29 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 00:26:44 +0100
Subject: [PATCH 092/213] Python

---
 tests/ci/clickbench.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 5f41d3d497f..82209acd2c4 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -4,7 +4,6 @@ import argparse
 import csv
 import logging
 import os
-import re
 import subprocess
 import sys
 import atexit
@@ -20,7 +19,6 @@ from clickhouse_helper import (
     prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
-    NotSet,
     RerunHelper,
     get_commit,
     override_status,
@@ -29,7 +27,6 @@ from commit_status_helper import (
     update_mergeable_check,
 )
 from docker_pull_helper import DockerImage, get_image_with_version
-from download_release_packages import download_last_release
 from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
@@ -38,8 +35,6 @@ from stopwatch import Stopwatch
 from tee_popen import TeePopen
 from upload_result_helper import upload_results
 
-NO_CHANGES_MSG = "Nothing to run"
-
 
 def get_image_name() -> str:
     return "clickhouse/clickbench"

From 9a3860581c4a8e3b01026c6355cb9ea6110a7ea1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 00:27:11 +0100
Subject: [PATCH 093/213] YAML

---
 .github/workflows/pull_request.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 3db382b1fd6..5bb62b04c32 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -715,7 +715,7 @@ jobs:
         cd "$REPO_COPY/tests/ci"
         python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT"
   ClickBenchAarch64:
-    needs: [ BuilderDebAarch64 ]
+    needs: [BuilderDebAarch64]
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: ClickBench (aarch64)

From af48cb97322541495e496bfc1a346186584d936e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 00:35:04 +0100
Subject: [PATCH 094/213] Python

---
 tests/ci/test_docker.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/ci/test_docker.py b/tests/ci/test_docker.py
index f15fd1ae63f..8aab50ed082 100644
--- a/tests/ci/test_docker.py
+++ b/tests/ci/test_docker.py
@@ -64,12 +64,6 @@ class TestDockerImageCheck(unittest.TestCase):
                     False,
                     "clickhouse/test-base",  # type: ignore
                 ),
-                di.DockerImage(
-                    "docker/test/clickbench",
-                    "clickhouse/clickbench",
-                    False,
-                    "clickhouse/test-base",  # type: ignore
-                ),
                 di.DockerImage(
                     "docker/test/keeper-jepsen",
                     "clickhouse/keeper-jepsen-test",

From ab2f60cceffa588e9f46359e142273aefdddba95 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 10 Dec 2023 02:45:51 +0100
Subject: [PATCH 095/213] sleep() function now can be cancelled with KILL
 QUERY.

---
 src/Functions/sleep.h                         | 25 ++++++++++--
 .../02932_kill_query_sleep.reference          |  2 +
 .../0_stateless/02932_kill_query_sleep.sh     | 38 +++++++++++++++++++
 3 files changed, 62 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02932_kill_query_sleep.reference
 create mode 100755 tests/queries/0_stateless/02932_kill_query_sleep.sh

diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index 160a8a2afe2..36fa14cd36e 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -10,6 +10,7 @@
 #include <base/sleep.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/ProcessList.h>
 
 
 namespace ProfileEvents
@@ -27,6 +28,7 @@ namespace ErrorCodes
     extern const int TOO_SLOW;
     extern const int ILLEGAL_COLUMN;
     extern const int BAD_ARGUMENTS;
+    extern const int QUERY_WAS_CANCELLED;
 }
 
 /** sleep(seconds) - the specified number of seconds sleeps each columns.
@@ -43,15 +45,20 @@ class FunctionSleep : public IFunction
 {
 private:
     UInt64 max_microseconds;
+    QueryStatusPtr query_status;
+
 public:
     static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow";
     static FunctionPtr create(ContextPtr context)
     {
-        return std::make_shared<FunctionSleep<variant>>(context->getSettingsRef().function_sleep_max_microseconds_per_block);
+        return std::make_shared<FunctionSleep<variant>>(
+            context->getSettingsRef().function_sleep_max_microseconds_per_block,
+            context->getProcessListElementSafe());
     }
 
-    FunctionSleep(UInt64 max_microseconds_)
+    FunctionSleep(UInt64 max_microseconds_, QueryStatusPtr query_status_)
         : max_microseconds(std::min(max_microseconds_, static_cast<UInt64>(std::numeric_limits<UInt32>::max())))
+        , query_status(query_status_)
     {
     }
 
@@ -128,7 +135,19 @@ public:
                         "The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})",
                         max_microseconds, microseconds, size);
 
-                sleepForMicroseconds(microseconds);
+                while (microseconds)
+                {
+                    UInt64 sleep_ms = microseconds;
+                    if (query_status)
+                        sleep_ms = std::min(sleep_ms, /* 1 second */ static_cast<size_t>(1000000));
+
+                    sleepForMicroseconds(sleep_ms);
+                    microseconds -= sleep_ms;
+
+                    if (query_status && query_status->isKilled())
+                        throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
+                }
+
                 ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
                 ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
             }
diff --git a/tests/queries/0_stateless/02932_kill_query_sleep.reference b/tests/queries/0_stateless/02932_kill_query_sleep.reference
new file mode 100644
index 00000000000..9c19635a83f
--- /dev/null
+++ b/tests/queries/0_stateless/02932_kill_query_sleep.reference
@@ -0,0 +1,2 @@
+Cancelling query
+QUERY_WAS_CANCELLED
diff --git a/tests/queries/0_stateless/02932_kill_query_sleep.sh b/tests/queries/0_stateless/02932_kill_query_sleep.sh
new file mode 100755
index 00000000000..81bb892bc15
--- /dev/null
+++ b/tests/queries/0_stateless/02932_kill_query_sleep.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+function wait_query_started()
+{
+    local query_id="$1"
+    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.query_log WHERE query_id='$query_id'") == 0 ]]; do
+        sleep 0.1;
+        $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS;"
+    done
+}
+
+function kill_query()
+{
+    local query_id="$1"
+    $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id'" >/dev/null
+    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id'") != 0 ]]; do sleep 0.1; done
+}
+
+
+sleep_query_id="sleep_query_id_02932_kill_query_sleep_${CLICKHOUSE_DATABASE}_$RANDOM"
+
+# This sleep query wants to sleep for 1000 seconds (which is too long).
+# We're going to cancel this query later.
+sleep_query="SELECT sleep(1000)"
+
+$CLICKHOUSE_CLIENT --query_id="$sleep_query_id" --function_sleep_max_microseconds_per_block="1000000000" --query "$sleep_query" >/dev/null 2>&1 &
+wait_query_started "$sleep_query_id"
+
+echo "Cancelling query"
+kill_query "$sleep_query_id"
+
+$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS;"
+$CLICKHOUSE_CLIENT --query "SELECT exception FROM system.query_log WHERE query_id='$sleep_query_id'" | grep -oF "QUERY_WAS_CANCELLED"

From 0eb7a41babe9148dc2b246d25717f4008900661e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 7 Dec 2023 13:54:28 +0300
Subject: [PATCH 096/213] CHJIT add assembly printer

---
 src/Interpreters/JIT/CHJIT.cpp | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp
index 63fe4f44f5f..8db50d73193 100644
--- a/src/Interpreters/JIT/CHJIT.cpp
+++ b/src/Interpreters/JIT/CHJIT.cpp
@@ -244,6 +244,33 @@ private:
     }
 };
 
+#define PRINT_ASSEMBLY
+
+#ifdef PRINT_ASSEMBLY
+
+class AssemblyPrinter
+{
+public:
+    explicit AssemblyPrinter(llvm::TargetMachine &target_machine_)
+    : target_machine(target_machine_)
+    {
+    }
+
+    void print(llvm::Module & module)
+    {
+        llvm::legacy::PassManager pass_manager;
+        target_machine.Options.MCOptions.AsmVerbose = true;
+        if (target_machine.addPassesToEmitFile(pass_manager, llvm::errs(), nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile))
+            throw Exception(ErrorCodes::CANNOT_COMPILE_CODE, "MachineCode cannot be printed");
+
+        pass_manager.run(module);
+    }
+private:
+    llvm::TargetMachine & target_machine;
+};
+
+#endif
+
 /** MemoryManager for module.
   * Keep total allocated size during RuntimeDyld linker execution.
   */
@@ -375,6 +402,11 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr<llvm::Module> module)
 {
     runOptimizationPassesOnModule(*module);
 
+#ifdef PRINT_ASSEMBLY
+    AssemblyPrinter assembly_printer(*machine);
+    assembly_printer.print(*module);
+#endif
+
     auto buffer = compiler->compile(*module);
 
     llvm::Expected<std::unique_ptr<llvm::object::ObjectFile>> object = llvm::object::ObjectFile::createObjectFile(*buffer);

From cdf6da88fe6e5dcbc0341e535a180d7169d25b59 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 10 Dec 2023 16:09:55 +0300
Subject: [PATCH 097/213] Fixed code review issues

---
 src/Interpreters/JIT/CHJIT.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp
index 8db50d73193..fc1decc8482 100644
--- a/src/Interpreters/JIT/CHJIT.cpp
+++ b/src/Interpreters/JIT/CHJIT.cpp
@@ -244,8 +244,6 @@ private:
     }
 };
 
-#define PRINT_ASSEMBLY
-
 #ifdef PRINT_ASSEMBLY
 
 class AssemblyPrinter

From bb501a0e136c41c5ac8573c010822ecaa30d136d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 19:06:38 +0100
Subject: [PATCH 098/213] Shellcheck

---
 docker/test/clickbench/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index eadaa2b5939..8244497a36c 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -33,7 +33,7 @@ clickhouse-client --time < /create.sql
 
 TRIES=3
 QUERY_NUM=1
-while read query; do
+while read -r query; do
     echo -n "["
     for i in $(seq 1 $TRIES); do
         RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:)

From 11dda11f22f252d4996171bcc2887a6664878588 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 19:08:50 +0100
Subject: [PATCH 099/213] Python

---
 tests/ci/clickbench.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 82209acd2c4..661a6ba50dc 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -130,7 +130,6 @@ def main():
     temp_path = Path(TEMP_PATH)
     temp_path.mkdir(parents=True, exist_ok=True)
 
-    repo_path = Path(REPO_COPY)
     reports_path = Path(REPORTS_PATH)
     post_commit_path = temp_path / "clickbench_status.tsv"
 
@@ -177,7 +176,6 @@ def main():
         packages_path,
         result_path,
         server_log_path,
-        kill_timeout,
         additional_envs,
         ci_logs_args,
         docker_image,
@@ -218,7 +216,7 @@ def main():
     print(f"::notice:: {check_name} Report url: {report_url}")
     if args.post_commit_status == "commit_status":
         post_commit_status(
-            commit, state, report_url, description, check_name_with_group, pr_info
+            commit, state, report_url, description, check_name, pr_info
         )
     elif args.post_commit_status == "file":
         post_commit_status_to_file(
@@ -239,6 +237,7 @@ def main():
         stopwatch.duration_seconds,
         stopwatch.start_time_str,
         report_url,
+        check_name
     )
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 

From 99c8430c3e3b25ef4098e7006ec751fa34cabdd9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 19:10:44 +0100
Subject: [PATCH 100/213] mypy

---
 tests/ci/clickbench.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 661a6ba50dc..657695a57a1 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -65,8 +65,8 @@ def process_results(
     result_directory: Path,
     server_log_path: Path,
 ) -> Tuple[str, str, List[str], List[Path]]:
-    test_results = []
-    additional_files = []
+    test_results = []  # type: List[str]
+    additional_files = []  # type: List[str]
     # Just upload all files from result_directory.
     # If task provides processed results, then it's responsible for content of result_directory.
     if result_directory.exists():

From c1e387d682df837782ea1b5dde828e38f6d18795 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sun, 10 Dec 2023 18:32:09 +0000
Subject: [PATCH 101/213] Automatic style fix

---
 tests/ci/clickbench.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 657695a57a1..c2efcacefad 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -215,9 +215,7 @@ def main():
 
     print(f"::notice:: {check_name} Report url: {report_url}")
     if args.post_commit_status == "commit_status":
-        post_commit_status(
-            commit, state, report_url, description, check_name, pr_info
-        )
+        post_commit_status(commit, state, report_url, description, check_name, pr_info)
     elif args.post_commit_status == "file":
         post_commit_status_to_file(
             post_commit_path,
@@ -237,7 +235,7 @@ def main():
         stopwatch.duration_seconds,
         stopwatch.start_time_str,
         report_url,
-        check_name
+        check_name,
     )
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 

From 75df8db29870de080c60684889f9a7440492d248 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 19:52:52 +0100
Subject: [PATCH 102/213] Python

---
 tests/ci/clickbench.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 657695a57a1..8e6b9a621fe 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -27,7 +27,7 @@ from commit_status_helper import (
     update_mergeable_check,
 )
 from docker_pull_helper import DockerImage, get_image_with_version
-from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
+from env_helper import TEMP_PATH, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
 from s3_helper import S3Helper
@@ -135,7 +135,6 @@ def main():
 
     args = parse_args()
     check_name = args.check_name
-    kill_timeout = args.kill_timeout
 
     gh = Github(get_best_robot_token(), per_page=100)
 

From ce894c28f3c22ac52f359442c6f583608f771503 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 19:54:59 +0100
Subject: [PATCH 103/213] mypy

---
 tests/ci/clickbench.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 8e6b9a621fe..870d83ecded 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -65,8 +65,8 @@ def process_results(
     result_directory: Path,
     server_log_path: Path,
 ) -> Tuple[str, str, List[str], List[Path]]:
-    test_results = []  # type: List[str]
-    additional_files = []  # type: List[str]
+    test_results = []  # type: List[TestResult]
+    additional_files = []  # type: List[Path]
     # Just upload all files from result_directory.
     # If task provides processed results, then it's responsible for content of result_directory.
     if result_directory.exists():
@@ -164,7 +164,7 @@ def main():
 
     run_log_path = result_path / "run.log"
 
-    additional_envs = []
+    additional_envs = []  # type: List[str]
 
     ci_logs_credentials = CiLogsCredentials(temp_path / "export-logs-config.sh")
     ci_logs_args = ci_logs_credentials.get_docker_arguments(

From 6b55c16b4ed40864aa0577fa61a9a6a41c12912d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 20:49:03 +0100
Subject: [PATCH 104/213] mypy

---
 tests/ci/clickbench.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 3cce429a4e3..9c700adb398 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -34,6 +34,7 @@ from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 from upload_result_helper import upload_results
+from report import TestResults
 
 
 def get_image_name() -> str:
@@ -64,8 +65,8 @@ def get_run_command(
 def process_results(
     result_directory: Path,
     server_log_path: Path,
-) -> Tuple[str, str, List[str], List[Path]]:
-    test_results = []  # type: List[TestResult]
+) -> Tuple[str, str, TestResults, List[Path]]:
+    test_results = []  # type: TestResults
     additional_files = []  # type: List[Path]
     # Just upload all files from result_directory.
     # If task provides processed results, then it's responsible for content of result_directory.

From 76ae90c5ae5f102a62c09731a311a6adaed4169f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 10 Dec 2023 20:25:08 +0000
Subject: [PATCH 105/213] test added

---
 .../0_stateless/02935_ipv6_bit_operations.reference        | 1 +
 tests/queries/0_stateless/02935_ipv6_bit_operations.sql    | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/queries/0_stateless/02935_ipv6_bit_operations.reference
 create mode 100644 tests/queries/0_stateless/02935_ipv6_bit_operations.sql

diff --git a/tests/queries/0_stateless/02935_ipv6_bit_operations.reference b/tests/queries/0_stateless/02935_ipv6_bit_operations.reference
new file mode 100644
index 00000000000..22d5cda0a39
--- /dev/null
+++ b/tests/queries/0_stateless/02935_ipv6_bit_operations.reference
@@ -0,0 +1 @@
+11111111111111110000000000000000111111111111111100000000000000001111111111111111000000000000000011111111111111110000000000000000	00000000000000001111111111111111000000000000000011111111111111110000000000000000111111111111111100000000000000001111111111111111	10101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010	01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101	10101010101010100000000000000000101010101010101000000000000000001010101010101010000000000000000010101010101010100000000000000000	10101010101010100000000000000000101010101010101000000000000000001010101010101010000000000000000010101010101010100000000000000000	1010101010101010000000000000000010101010101010100000000000000000101010101010101000000000000000001010101010101010	1010101010101010000000000000000010101010101010100000000000000000101010101010101000000000000000001010101010101010	01010101010101010000000000000000010101010101010100000000000000000101010101010101000000000000000001010101010101010000000000000000	01010101010101010000000000000000010101010101010100000000000000000101010101010101000000000000000001010101010101010000000000000000	0101010101010101000000000000000001010101010101010000000000000000010101010101010100000000000000000101010101010101	0101010101010101000000000000000001010101010101010000000000000000010101010101010100000000000000000101010101010101	11111111111111111010101010101010111111111111111110101010101010101111111111111111101010101010101011111111111111111010101010101010	11111111111111111010101010101010111111111111111110101010101010101111111111111111101010101010101011111111111111111010101010101010	10101010101010101111111111111111101010101010101011111111111111111010101010101010111111111111111110101010101010101111111111111111	10101010101010101111111111111111101010101010101011111111111111111010101010101010111111111111111110101010101010101111111111111111	11111111111111110101010101010101111111111111111101010101010101011111111111111111010101010101010111111111111111110101010101010101	11111111111111110101010101010101111111111111111101010101010101011111111111111111010101010101010111111111111111110101010101010101	01010101010101011111111111111111010101010101010111111111111111110101010101010101111111111111111101010101010101011111111111111111	01010101010101011111111111111111010101010101010111111111111111110101010101010101111111111111111101010101010101011111111111111111
diff --git a/tests/queries/0_stateless/02935_ipv6_bit_operations.sql b/tests/queries/0_stateless/02935_ipv6_bit_operations.sql
new file mode 100644
index 00000000000..6598c2ac539
--- /dev/null
+++ b/tests/queries/0_stateless/02935_ipv6_bit_operations.sql
@@ -0,0 +1,7 @@
+WITH toIPv6('FFFF:0000:FFFF:0000:FFFF:0000:FFFF:0000') AS ip1, toIPv6('0000:FFFF:0000:FFFF:0000:FFFF:0000:FFFF') AS ip2,
+     CAST('226854911280625642308916404954512140970', 'UInt128') AS n1, CAST('113427455640312821154458202477256070485', 'UInt128') AS n2
+SELECT bin(ip1), bin(ip2), bin(n1), bin(n2),
+       bin(bitAnd(ip1, n1)), bin(bitAnd(n1, ip1)), bin(bitAnd(ip2, n1)), bin(bitAnd(n1, ip2)),
+       bin(bitAnd(ip1, n2)), bin(bitAnd(n2, ip1)), bin(bitAnd(ip2, n2)), bin(bitAnd(n2, ip2)),
+       bin(bitOr(ip1, n1)), bin(bitOr(n1, ip1)), bin(bitOr(ip2, n1)), bin(bitOr(n1, ip2)),
+       bin(bitOr(ip1, n2)), bin(bitOr(n2, ip1)), bin(bitOr(ip2, n2)), bin(bitOr(n2, ip2));

From 4b13a6d08f35daf09ee1f6ef951f260f68523ea5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 22:02:05 +0100
Subject: [PATCH 106/213] Fix CI

---
 tests/ci/ci_config.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index aa3aa5654aa..e9f75d66b2e 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -329,6 +329,8 @@ CI_CONFIG = CiConfig(
         "SQLancer (debug)": TestConfig("package_debug"),
         "Sqllogic test (release)": TestConfig("package_release"),
         "SQLTest": TestConfig("package_release"),
+        "ClickBench (amd64)": TestConfig("package_release"),
+        "ClickBench (aarch64)": TestConfig("package_aarch64"),
         "libFuzzer tests": TestConfig("fuzzers"),
     },
 )
@@ -507,6 +509,11 @@ CHECK_DESCRIPTIONS = [
         "successfully startup without any errors, crashes or sanitizer asserts",
         lambda x: x.startswith("Upgrade check ("),
     ),
+    CheckDescription(
+        "ClickBench",
+        "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table",
+        lambda x: x.startswith("Upgrade check ("),
+    ),
     CheckDescription(
         "Falback for unknown",
         "There's no description for the check yet, please add it to "

From 7df7793724c7ff30c0f6c0d475eeda121d9c3cab Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 10 Dec 2023 23:53:22 +0100
Subject: [PATCH 107/213] Docker

---
 docker/test/clickbench/run.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 8244497a36c..82eb06dffcd 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -11,10 +11,10 @@ dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb
 
 # A directory for cache
-sudo mkdir /dev/shm/clickhouse
-sudo chown clickhouse:clickhouse /dev/shm/clickhouse
+mkdir /dev/shm/clickhouse
+chown clickhouse:clickhouse /dev/shm/clickhouse
 
-sudo clickhouse start
+clickhouse start
 
 # Wait for the server to start, but not for too long.
 for _ in {1..100}

From 740ceea108f61d8cda725d6a1a5c0f26b4b399cd Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 11 Dec 2023 03:10:17 +0100
Subject: [PATCH 108/213] Docker

---
 docker/test/base/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile
index b55baa0e0fc..b48017fdacc 100644
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@@ -12,6 +12,7 @@ RUN apt-get update \
         ripgrep \
         zstd \
         locales \
+        sudo \
         --yes --no-install-recommends
 
 # Sanitizer options for services (clickhouse-server)

From 89ba4a845232849b7bd2dc807b63328d853f0451 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 7 Dec 2023 07:28:41 +0100
Subject: [PATCH 109/213] Add HTTP readiness endpoint

---
 programs/keeper/Keeper.cpp                    | 23 +++++
 programs/server/Server.cpp                    | 29 ++++++
 src/Server/HTTPHandlerFactory.h               |  7 ++
 src/Server/KeeperReadinessHandler.cpp         | 95 +++++++++++++++++++
 src/Server/KeeperReadinessHandler.h           | 31 ++++++
 tests/integration/helpers/keeper_utils.py     |  5 +
 .../test_keeper_http_control/__init__.py      |  0
 .../configs/enable_keeper1.xml                | 37 ++++++++
 .../configs/enable_keeper2.xml                | 37 ++++++++
 .../configs/enable_keeper3.xml                | 37 ++++++++
 .../test_keeper_http_control/test.py          | 62 ++++++++++++
 11 files changed, 363 insertions(+)
 create mode 100644 src/Server/KeeperReadinessHandler.cpp
 create mode 100644 src/Server/KeeperReadinessHandler.h
 create mode 100644 tests/integration/test_keeper_http_control/__init__.py
 create mode 100644 tests/integration/test_keeper_http_control/configs/enable_keeper1.xml
 create mode 100644 tests/integration/test_keeper_http_control/configs/enable_keeper2.xml
 create mode 100644 tests/integration/test_keeper_http_control/configs/enable_keeper3.xml
 create mode 100644 tests/integration/test_keeper_http_control/test.py

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 1acf7e39b04..4f45b09b682 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -466,6 +466,29 @@ try
                 std::make_unique<HTTPServer>(
                     std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
         });
+
+        /// HTTP control endpoints
+        port_name = "keeper_server.http_control.port";
+        createServer(listen_host, port_name, listen_try, [&](UInt16 port) mutable
+        {
+            auto my_http_context = httpContext();
+            Poco::Timespan my_keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
+            Poco::Net::HTTPServerParams::Ptr my_http_params = new Poco::Net::HTTPServerParams;
+            my_http_params->setTimeout(my_http_context->getReceiveTimeout());
+            my_http_params->setKeepAliveTimeout(my_keep_alive_timeout);
+
+            Poco::Net::ServerSocket socket;
+            auto address = socketBindListen(socket, listen_host, port);
+            socket.setReceiveTimeout(my_http_context->getReceiveTimeout());
+            socket.setSendTimeout(my_http_context->getSendTimeout());
+            servers->emplace_back(
+                listen_host,
+                port_name,
+                "HTTP Control: http://" + address.toString(),
+                std::make_unique<HTTPServer>(
+                    std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(*this, config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)
+                    );
+        });
     }
 
     for (auto & server : *servers)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 8076d108083..ee3c7a72edc 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1551,6 +1551,35 @@ try
                     throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
 #endif
                 });
+
+            /// HTTP control endpoints
+            port_name = "keeper_server.http_control.port";
+            createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false,
+            servers_to_start_before_tables,
+            [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                auto http_context = httpContext();
+                Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
+                Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+                http_params->setTimeout(http_context->getReceiveTimeout());
+                http_params->setKeepAliveTimeout(keep_alive_timeout);
+
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config(), socket, listen_host, port);
+                socket.setReceiveTimeout(http_context->getReceiveTimeout());
+                socket.setSendTimeout(http_context->getSendTimeout());
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "HTTP Control: http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        std::move(http_context),
+                        createKeeperHTTPControlMainHandlerFactory(
+                            *this,
+                            config_getter(),
+                            global_context->getKeeperDispatcher(),
+                            "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params));
+            });
         }
 #else
         throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index 94b02e52277..fd927d480fd 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -5,6 +5,7 @@
 #include <Server/HTTP/HTTPRequestHandlerFactory.h>
 #include <Server/HTTPHandlerRequestFilter.h>
 #include <Server/HTTPRequestHandlerFactoryMain.h>
+#include <Coordination/KeeperDispatcher.h>
 #include <Common/StringUtils/StringUtils.h>
 
 #include <Poco/Util/AbstractConfiguration.h>
@@ -136,6 +137,12 @@ createPrometheusMainHandlerFactory(IServer & server,
     AsynchronousMetrics & async_metrics,
     const std::string & name);
 
+HTTPRequestHandlerFactoryPtr
+createKeeperHTTPControlMainHandlerFactory(IServer & server,
+    const Poco::Util::AbstractConfiguration & config,
+    std::shared_ptr<KeeperDispatcher> keeper_dispatcher,
+    const std::string & name);
+
 /// @param server - used in handlers to check IServer::isCancelled()
 /// @param config - not the same as server.config(), since it can be newer
 /// @param async_metrics - used for prometheus (in case of prometheus.asynchronous_metrics=true)
diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp
new file mode 100644
index 00000000000..f69f6cb377a
--- /dev/null
+++ b/src/Server/KeeperReadinessHandler.cpp
@@ -0,0 +1,95 @@
+#include <memory>
+
+#include <Server/KeeperReadinessHandler.h>
+#include <Databases/IDatabase.h>
+#include <IO/HTTPCommon.h>
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/HTTPHandlerRequestFilter.h>
+#include <Server/IServer.h>
+#include <Storages/StorageReplicatedMergeTree.h>
+#include <Common/typeid_cast.h>
+#include <Coordination/KeeperDispatcher.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+
+#include <Poco/Net/HTTPRequestHandlerFactory.h>
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+#include <Poco/JSON/JSON.h>
+#include <Poco/JSON/Object.h>
+#include <Poco/JSON/Stringifier.h>
+
+namespace DB
+{
+
+void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTPServerResponse & response)
+{
+    try
+    {
+        auto is_leader = keeper_dispatcher->isLeader();
+        auto is_follower = keeper_dispatcher->isFollower() && keeper_dispatcher->hasLeader();
+
+        auto status = is_leader || is_follower;
+
+        Poco::JSON::Object json, details;
+
+        details.set("leader", is_leader);
+        details.set("follower", is_follower);
+        json.set("details", details);
+        json.set("status", status ? "ok": "fail");
+
+        std::ostringstream oss;     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        oss.exceptions(std::ios::failbit);
+        Poco::JSON::Stringifier::stringify(json, oss);
+
+        if (!status)
+            response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE);
+
+        *response.send() << oss.str();
+    }
+    catch (...)
+    {
+        tryLogCurrentException("KeeperReadinessHandler");
+
+        try
+        {
+            response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
+
+            if (!response.sent())
+            {
+                /// We have not sent anything yet and we don't even know if we need to compress response.
+                *response.send() << getCurrentExceptionMessage(false) << std::endl;
+            }
+        }
+        catch (...)
+        {
+            LOG_ERROR((&Poco::Logger::get("KeeperReadinessHandler")), "Cannot send exception to client");
+        }
+    }
+}
+
+
+HTTPRequestHandlerFactoryPtr createKeeperHTTPControlMainHandlerFactory(
+    IServer & server,
+    const Poco::Util::AbstractConfiguration & config,
+    std::shared_ptr<KeeperDispatcher> keeper_dispatcher,
+    const std::string & name)
+{
+    auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
+    using Factory = HandlingRuleHTTPHandlerFactory<KeeperReadinessHandler>;
+    Factory::Creator creator = [&server, keeper_dispatcher]() -> std::unique_ptr<KeeperReadinessHandler>
+    {
+        return std::make_unique<KeeperReadinessHandler>(server, keeper_dispatcher);
+    };
+
+    auto readiness_handler = std::make_shared<Factory>(std::move(creator));
+
+    readiness_handler->attachStrictPath(config.getString("keeper_server.http_control.readiness.endpoint", "/ready"));
+    readiness_handler->allowGetAndHeadRequest();
+    factory->addHandler(readiness_handler);
+
+    return factory;
+}
+
+}
diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h
new file mode 100644
index 00000000000..143751c5d67
--- /dev/null
+++ b/src/Server/KeeperReadinessHandler.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <memory>
+#include <Server/HTTP/HTTPRequestHandler.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Coordination/KeeperDispatcher.h>
+
+namespace DB
+{
+
+class Context;
+class IServer;
+
+class KeeperReadinessHandler : public HTTPRequestHandler, WithContext
+{
+private:
+    IServer & server;
+    std::shared_ptr<KeeperDispatcher> keeper_dispatcher;
+
+public:
+    explicit KeeperReadinessHandler(IServer & server_, std::shared_ptr<KeeperDispatcher> keeper_dispatcher_)
+        : server(server_)
+        , keeper_dispatcher(keeper_dispatcher_)
+    {
+    }
+
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+};
+
+
+}
diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 1ca17e923e4..6e4c1c1d417 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -278,6 +278,11 @@ def get_leader(cluster, nodes):
             return node
     raise Exception("No leader in Keeper cluster.")
 
+def get_follower(cluster, nodes):
+    for node in nodes:
+        if is_follower(cluster, node):
+            return node
+    raise Exception("No followers in Keeper cluster.")
 
 def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient:
     _fake = KazooClient(
diff --git a/tests/integration/test_keeper_http_control/__init__.py b/tests/integration/test_keeper_http_control/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_keeper_http_control/configs/enable_keeper1.xml b/tests/integration/test_keeper_http_control/configs/enable_keeper1.xml
new file mode 100644
index 00000000000..20e3c307f31
--- /dev/null
+++ b/tests/integration/test_keeper_http_control/configs/enable_keeper1.xml
@@ -0,0 +1,37 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>9234</port>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>9234</port>
+                <start_as_follower>true</start_as_follower>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>9234</port>
+                <start_as_follower>true</start_as_follower>
+            </server>
+        </raft_configuration>
+        <http_control>
+            <port>9182</port>
+        </http_control>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_http_control/configs/enable_keeper2.xml b/tests/integration/test_keeper_http_control/configs/enable_keeper2.xml
new file mode 100644
index 00000000000..b9002eb2436
--- /dev/null
+++ b/tests/integration/test_keeper_http_control/configs/enable_keeper2.xml
@@ -0,0 +1,37 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>2</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>9234</port>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>9234</port>
+                <start_as_follower>true</start_as_follower>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>9234</port>
+                <start_as_follower>true</start_as_follower>
+            </server>
+        </raft_configuration>
+        <http_control>
+            <port>9182</port>
+        </http_control>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_http_control/configs/enable_keeper3.xml b/tests/integration/test_keeper_http_control/configs/enable_keeper3.xml
new file mode 100644
index 00000000000..6e4e17399f7
--- /dev/null
+++ b/tests/integration/test_keeper_http_control/configs/enable_keeper3.xml
@@ -0,0 +1,37 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>9181</tcp_port>
+        <server_id>3</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+
+        <coordination_settings>
+            <operation_timeout_ms>5000</operation_timeout_ms>
+            <session_timeout_ms>10000</session_timeout_ms>
+            <raft_logs_level>trace</raft_logs_level>
+        </coordination_settings>
+
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>node1</hostname>
+                <port>9234</port>
+            </server>
+            <server>
+                <id>2</id>
+                <hostname>node2</hostname>
+                <port>9234</port>
+                <start_as_follower>true</start_as_follower>
+            </server>
+            <server>
+                <id>3</id>
+                <hostname>node3</hostname>
+                <port>9234</port>
+                <start_as_follower>true</start_as_follower>
+            </server>
+        </raft_configuration>
+        <http_control>
+            <port>9182</port>
+        </http_control>
+    </keeper_server>
+</clickhouse>
diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py
new file mode 100644
index 00000000000..04d84671de2
--- /dev/null
+++ b/tests/integration/test_keeper_http_control/test.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+import os
+import pytest
+import requests
+
+import helpers.keeper_utils as keeper_utils
+from kazoo.client import KazooClient
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs")
+
+node1 = cluster.add_instance(
+    "node1", main_configs=["configs/enable_keeper1.xml"], stay_alive=True
+)
+node2 = cluster.add_instance(
+    "node2", main_configs=["configs/enable_keeper2.xml"], stay_alive=True
+)
+node3 = cluster.add_instance(
+    "node3", main_configs=["configs/enable_keeper3.xml"], stay_alive=True
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def get_fake_zk(node, timeout=30.0):
+    _fake_zk_instance = KazooClient(
+        hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout
+    )
+    _fake_zk_instance.start()
+    return _fake_zk_instance
+
+def test_http_readiness(started_cluster):
+    leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
+    response = requests.get(
+            "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182)
+    )
+    assert(response.status_code == 200)
+
+    readiness_data = response.json()
+    assert(readiness_data["status"] == "ok")
+    assert(readiness_data["details"]["leader"] == True)
+    assert(readiness_data["details"]["follower"] == False)
+
+    follower = keeper_utils.get_follower(cluster, [node1, node2, node3])
+    response = requests.get(
+            "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182)
+    )
+    assert(response.status_code == 200)
+
+    readiness_data = response.json()
+    assert(readiness_data["status"] == "ok")
+    assert(readiness_data["details"]["leader"] == False)
+    assert(readiness_data["details"]["follower"] == True)

From 3adb83e406027edc92a9792aa0f5587e33451765 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 7 Dec 2023 06:49:10 +0000
Subject: [PATCH 110/213] Automatic style fix

---
 tests/integration/helpers/keeper_utils.py     |  2 ++
 .../test_keeper_http_control/test.py          | 21 ++++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 6e4c1c1d417..e07bce901d2 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -278,12 +278,14 @@ def get_leader(cluster, nodes):
             return node
     raise Exception("No leader in Keeper cluster.")
 
+
 def get_follower(cluster, nodes):
     for node in nodes:
         if is_follower(cluster, node):
             return node
     raise Exception("No followers in Keeper cluster.")
 
+
 def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient:
     _fake = KazooClient(
         hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout
diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py
index 04d84671de2..b415a03a5c4 100644
--- a/tests/integration/test_keeper_http_control/test.py
+++ b/tests/integration/test_keeper_http_control/test.py
@@ -38,25 +38,26 @@ def get_fake_zk(node, timeout=30.0):
     _fake_zk_instance.start()
     return _fake_zk_instance
 
+
 def test_http_readiness(started_cluster):
     leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
     response = requests.get(
-            "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182)
+        "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182)
     )
-    assert(response.status_code == 200)
+    assert response.status_code == 200
 
     readiness_data = response.json()
-    assert(readiness_data["status"] == "ok")
-    assert(readiness_data["details"]["leader"] == True)
-    assert(readiness_data["details"]["follower"] == False)
+    assert readiness_data["status"] == "ok"
+    assert readiness_data["details"]["leader"] == True
+    assert readiness_data["details"]["follower"] == False
 
     follower = keeper_utils.get_follower(cluster, [node1, node2, node3])
     response = requests.get(
-            "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182)
+        "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182)
     )
-    assert(response.status_code == 200)
+    assert response.status_code == 200
 
     readiness_data = response.json()
-    assert(readiness_data["status"] == "ok")
-    assert(readiness_data["details"]["leader"] == False)
-    assert(readiness_data["details"]["follower"] == True)
+    assert readiness_data["status"] == "ok"
+    assert readiness_data["details"]["leader"] == False
+    assert readiness_data["details"]["follower"] == True

From 771d5c58ea49271b96e70fbfe5c992362eae6c13 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 8 Dec 2023 05:28:01 +0100
Subject: [PATCH 111/213] Fix Keeper standalone build

---
 programs/keeper/CMakeLists.txt        |  1 +
 programs/keeper/Keeper.cpp            |  3 ++-
 programs/server/Server.cpp            |  2 +-
 src/Server/HTTPHandlerFactory.h       |  6 ------
 src/Server/KeeperReadinessHandler.cpp | 17 ++++-------------
 src/Server/KeeperReadinessHandler.h   | 15 +++++++++------
 6 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index cdb1d89b18e..f3d82b6029b 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -68,6 +68,7 @@ if (BUILD_STANDALONE_KEEPER)
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 4f45b09b682..7585f147161 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -32,6 +32,7 @@
 #include <Server/HTTP/HTTPServer.h>
 #include <Server/TCPServer.h>
 #include <Server/HTTPHandlerFactory.h>
+#include <Server/KeeperReadinessHandler.h>
 
 #include "Core/Defines.h"
 #include "config.h"
@@ -486,7 +487,7 @@ try
                 port_name,
                 "HTTP Control: http://" + address.toString(),
                 std::make_unique<HTTPServer>(
-                    std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(*this, config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)
+                    std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)
                     );
         });
     }
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index ee3c7a72edc..5abaf67e3d8 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -92,6 +92,7 @@
 #include <Server/ProxyV1HandlerFactory.h>
 #include <Server/TLSHandlerFactory.h>
 #include <Server/ProtocolServerAdapter.h>
+#include <Server/KeeperReadinessHandler.h>
 #include <Server/HTTP/HTTPServer.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Core/ServerSettings.h>
@@ -1575,7 +1576,6 @@ try
                     std::make_unique<HTTPServer>(
                         std::move(http_context),
                         createKeeperHTTPControlMainHandlerFactory(
-                            *this,
                             config_getter(),
                             global_context->getKeeperDispatcher(),
                             "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params));
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index fd927d480fd..459d54e27a5 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -137,12 +137,6 @@ createPrometheusMainHandlerFactory(IServer & server,
     AsynchronousMetrics & async_metrics,
     const std::string & name);
 
-HTTPRequestHandlerFactoryPtr
-createKeeperHTTPControlMainHandlerFactory(IServer & server,
-    const Poco::Util::AbstractConfiguration & config,
-    std::shared_ptr<KeeperDispatcher> keeper_dispatcher,
-    const std::string & name);
-
 /// @param server - used in handlers to check IServer::isCancelled()
 /// @param config - not the same as server.config(), since it can be newer
 /// @param async_metrics - used for prometheus (in case of prometheus.asynchronous_metrics=true)
diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp
index f69f6cb377a..9ba4fe44ab4 100644
--- a/src/Server/KeeperReadinessHandler.cpp
+++ b/src/Server/KeeperReadinessHandler.cpp
@@ -1,18 +1,11 @@
 #include <memory>
 
-#include <Server/KeeperReadinessHandler.h>
-#include <Databases/IDatabase.h>
 #include <IO/HTTPCommon.h>
-#include <Interpreters/Context.h>
-#include <Server/HTTP/HTMLForm.h>
+#include <Coordination/KeeperDispatcher.h>
+#include <Server/KeeperReadinessHandler.h>
 #include <Server/HTTPHandlerFactory.h>
 #include <Server/HTTPHandlerRequestFilter.h>
-#include <Server/IServer.h>
-#include <Storages/StorageReplicatedMergeTree.h>
-#include <Common/typeid_cast.h>
-#include <Coordination/KeeperDispatcher.h>
 #include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
-
 #include <Poco/Net/HTTPRequestHandlerFactory.h>
 #include <Poco/Net/HTTPServerRequest.h>
 #include <Poco/Net/HTTPServerResponse.h>
@@ -69,18 +62,16 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP
     }
 }
 
-
 HTTPRequestHandlerFactoryPtr createKeeperHTTPControlMainHandlerFactory(
-    IServer & server,
     const Poco::Util::AbstractConfiguration & config,
     std::shared_ptr<KeeperDispatcher> keeper_dispatcher,
     const std::string & name)
 {
     auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
     using Factory = HandlingRuleHTTPHandlerFactory<KeeperReadinessHandler>;
-    Factory::Creator creator = [&server, keeper_dispatcher]() -> std::unique_ptr<KeeperReadinessHandler>
+    Factory::Creator creator = [keeper_dispatcher]() -> std::unique_ptr<KeeperReadinessHandler>
     {
-        return std::make_unique<KeeperReadinessHandler>(server, keeper_dispatcher);
+        return std::make_unique<KeeperReadinessHandler>(keeper_dispatcher);
     };
 
     auto readiness_handler = std::make_shared<Factory>(std::move(creator));
diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h
index 143751c5d67..03fd58e831f 100644
--- a/src/Server/KeeperReadinessHandler.h
+++ b/src/Server/KeeperReadinessHandler.h
@@ -1,8 +1,8 @@
 #pragma once
 
-#include <memory>
+#include <Server/IServer.h>
 #include <Server/HTTP/HTTPRequestHandler.h>
-#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPRequestHandlerFactory.h>
 #include <Coordination/KeeperDispatcher.h>
 
 namespace DB
@@ -14,18 +14,21 @@ class IServer;
 class KeeperReadinessHandler : public HTTPRequestHandler, WithContext
 {
 private:
-    IServer & server;
     std::shared_ptr<KeeperDispatcher> keeper_dispatcher;
 
 public:
-    explicit KeeperReadinessHandler(IServer & server_, std::shared_ptr<KeeperDispatcher> keeper_dispatcher_)
-        : server(server_)
-        , keeper_dispatcher(keeper_dispatcher_)
+    explicit KeeperReadinessHandler(std::shared_ptr<KeeperDispatcher> keeper_dispatcher_)
+        : keeper_dispatcher(keeper_dispatcher_)
     {
     }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };
 
+HTTPRequestHandlerFactoryPtr
+createKeeperHTTPControlMainHandlerFactory(
+    const Poco::Util::AbstractConfiguration & config,
+    std::shared_ptr<KeeperDispatcher> keeper_dispatcher,
+    const std::string & name);
 
 }

From 76966818bade118a0e003469314fbb196f806c66 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Fri, 8 Dec 2023 13:57:22 +0100
Subject: [PATCH 112/213] Fix FreeBSD build

---
 src/Server/KeeperReadinessHandler.cpp |  7 ++++++-
 src/Server/KeeperReadinessHandler.h   | 10 ++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp
index 9ba4fe44ab4..37afd8e9898 100644
--- a/src/Server/KeeperReadinessHandler.cpp
+++ b/src/Server/KeeperReadinessHandler.cpp
@@ -1,8 +1,11 @@
+#include <Server/KeeperReadinessHandler.h>
+
+#if USE_NURAFT
+
 #include <memory>
 
 #include <IO/HTTPCommon.h>
 #include <Coordination/KeeperDispatcher.h>
-#include <Server/KeeperReadinessHandler.h>
 #include <Server/HTTPHandlerFactory.h>
 #include <Server/HTTPHandlerRequestFilter.h>
 #include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
@@ -84,3 +87,5 @@ HTTPRequestHandlerFactoryPtr createKeeperHTTPControlMainHandlerFactory(
 }
 
 }
+
+#endif
diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h
index 03fd58e831f..caa59098427 100644
--- a/src/Server/KeeperReadinessHandler.h
+++ b/src/Server/KeeperReadinessHandler.h
@@ -1,6 +1,9 @@
 #pragma once
 
-#include <Server/IServer.h>
+#include "config.h"
+
+#if USE_NURAFT
+
 #include <Server/HTTP/HTTPRequestHandler.h>
 #include <Server/HTTP/HTTPRequestHandlerFactory.h>
 #include <Coordination/KeeperDispatcher.h>
@@ -8,9 +11,6 @@
 namespace DB
 {
 
-class Context;
-class IServer;
-
 class KeeperReadinessHandler : public HTTPRequestHandler, WithContext
 {
 private:
@@ -32,3 +32,5 @@ createKeeperHTTPControlMainHandlerFactory(
     const std::string & name);
 
 }
+
+#endif

From 7172a8ec9a87b43097db50f64914f2991329856c Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <thevar1able@users.noreply.github.com>
Date: Fri, 8 Dec 2023 13:22:46 +0100
Subject: [PATCH 113/213] Remove redundant include

---
 src/Server/HTTPHandlerFactory.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index 459d54e27a5..94b02e52277 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -5,7 +5,6 @@
 #include <Server/HTTP/HTTPRequestHandlerFactory.h>
 #include <Server/HTTPHandlerRequestFilter.h>
 #include <Server/HTTPRequestHandlerFactoryMain.h>
-#include <Coordination/KeeperDispatcher.h>
 #include <Common/StringUtils/StringUtils.h>
 
 #include <Poco/Util/AbstractConfiguration.h>

From 81250d1a901040e18c8ad96486a27dd9f78bc9dc Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 10 Dec 2023 16:05:29 +0300
Subject: [PATCH 114/213] SerializationString improve performance

---
 src/DataTypes/Serializations/SerializationString.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index 89fa09781ef..c59c642cf04 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -152,6 +152,8 @@ template <int UNROLL_TIMES>
 static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
 {
     size_t offset = data.size();
+    data.resize(std::max(data.capacity(), static_cast<size_t>(4096)));
+
     for (size_t i = 0; i < limit; ++i)
     {
         if (istr.eof())
@@ -171,7 +173,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
         offset += size + 1;
         offsets.push_back(offset);
 
-        data.resize(offset);
+        if (unlikely(offset > data.size()))
+            data.resize(data.size() * 2);
 
         if (size)
         {
@@ -203,6 +206,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
 
         data[offset - 1] = 0;
     }
+
+    data.resize(offset);
 }
 
 

From 3652419873e3c940170c95970c5a973e6201b05d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Dec 2023 11:01:13 +0100
Subject: [PATCH 115/213] Update run.sh

---
 docker/test/stress/run.sh                        | 16 ++++++++++------
 src/Interpreters/Cache/SLRUFileCachePriority.cpp |  1 -
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 5e12ade24d5..67056cc1bc1 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -69,12 +69,16 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
 stop
 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
 
-# Randomly choose between LRU and SLRU policies.
-cache_policy="SLRU"
-#TODO: uncomment this before merge, for testing purposes it is SLRU only before merge.
-#if [$(($RANDOM%2)) -eq 1]; then
-#    cache_policy="LRU"
-#fi
+# Randomize cache policies.
+cache_policy=""
+if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
+    cache_policy="SLRU"
+else
+    cache_policy="LRU"
+fi
+
+echo "Using cache policy: $cache_policy"
+
 if [ "$cache_policy" = "SLRU" ]; then
     sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
     | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 7b3e666f595..71b8d44d438 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -150,7 +150,6 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
 
     /// Entry is in probationary queue.
     /// We need to move it to protected queue.
-
     const size_t size = iterator.getEntry().size;
     if (size > protected_queue.getSizeLimit())
     {

From c78649bdcffd9ac1b1cf547f1b8d29e4b4deea38 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Mon, 11 Dec 2023 10:36:26 +0000
Subject: [PATCH 116/213] Support negative positional arguments

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 10 +-
 .../replaceForPositionalArguments.cpp         | 10 +-
 .../02006_test_positional_arguments.reference | 94 +++++++++++++++++++
 .../02006_test_positional_arguments.sql       | 21 +++++
 4 files changed, 127 insertions(+), 8 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 1e63d5ca8e4..a52a0fac232 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2153,18 +2153,20 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
             node_to_replace = &sort_node->getExpression();
 
         auto * constant_node = (*node_to_replace)->as<ConstantNode>();
-        if (!constant_node || constant_node->getValue().getType() != Field::Types::UInt64)
+        if (!constant_node
+            || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64))
             continue;
 
-        UInt64 positional_argument_number = constant_node->getValue().get<UInt64>();
-        if (positional_argument_number == 0 || positional_argument_number > projection_nodes.size())
+        auto positional_argument_number = constant_node->getValue().get<Int64>();
+        if (positional_argument_number == 0 || static_cast<size_t>(std::abs(positional_argument_number)) > projection_nodes.size())
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}",
                 positional_argument_number,
                 projection_nodes.size(),
                 scope.scope_node->formatASTForErrorMessage());
 
-        --positional_argument_number;
+        positional_argument_number
+            = (positional_argument_number > 0) ? --positional_argument_number : projection_nodes.size() + positional_argument_number;
         *node_to_replace = projection_nodes[positional_argument_number];
     }
 }
diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp
index 241dd7cf92c..f5a77dacd4c 100644
--- a/src/Interpreters/replaceForPositionalArguments.cpp
+++ b/src/Interpreters/replaceForPositionalArguments.cpp
@@ -27,16 +27,18 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
         return false;
 
     auto which = ast_literal->value.getType();
-    if (which != Field::Types::UInt64)
+    if (which != Field::Types::UInt64 && which != Field::Types::Int64)
         return false;
 
-    auto pos = ast_literal->value.get<UInt64>();
-    if (!pos || pos > columns.size())
+    auto pos = ast_literal->value.get<Int64>();
+    if (!pos || static_cast<size_t>(std::abs(pos)) > columns.size())
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                 "Positional argument out of bounds: {} (expected in range [1, {}]",
                         pos, columns.size());
 
-    const auto & column = columns[--pos];
+    pos = (pos > 0) ? --pos : columns.size() + pos;
+
+    const auto & column = columns[pos];
     if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))
     {
         argument = column->clone();
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference
index 40100e8d5be..079bd071103 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments.reference
+++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference
@@ -3,18 +3,50 @@ select x3, x2, x1 from test order by 1;
 1	100	100
 10	1	10
 100	10	1
+select x3, x2, x1 from test order by -3;
+1	100	100
+10	1	10
+100	10	1
 select x3, x2, x1 from test order by x3;
 1	100	100
 10	1	10
 100	10	1
+select x3, x2, x1 from test order by 3;
+100	10	1
+10	1	10
+1	100	100
+select x3, x2, x1 from test order by -1;
+100	10	1
+10	1	10
+1	100	100
+select x3, x2, x1 from test order by x1;
+100	10	1
+10	1	10
+1	100	100
 select x3, x2, x1 from test order by 1 desc;
 100	10	1
 10	1	10
 1	100	100
+select x3, x2, x1 from test order by -3 desc;
+100	10	1
+10	1	10
+1	100	100
 select x3, x2, x1 from test order by x3 desc;
 100	10	1
 10	1	10
 1	100	100
+select x3, x2, x1 from test order by 3 desc;
+1	100	100
+10	1	10
+100	10	1
+select x3, x2, x1 from test order by -1 desc;
+1	100	100
+10	1	10
+100	10	1
+select x3, x2, x1 from test order by x1 desc;
+1	100	100
+10	1	10
+100	10	1
 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x3, x2 from test group by x3, x2 order by x3;
 1	100
@@ -54,6 +86,20 @@ SELECT
     x1
 FROM test
 ORDER BY x3 + 1 ASC
+explain syntax select x3, x2, x1 from test order by -1;
+SELECT
+    x3,
+    x2,
+    x1
+FROM test
+ORDER BY x1 ASC
+explain syntax select x3 + 1, x2, x1 from test order by -1;
+SELECT
+    x3 + 1,
+    x2,
+    x1
+FROM test
+ORDER BY x1 ASC
 explain syntax select x3, x3 - x2, x2, x1 from test order by 2;
 SELECT
     x3,
@@ -62,6 +108,14 @@ SELECT
     x1
 FROM test
 ORDER BY x3 - x2 ASC
+explain syntax select x3, x3 - x2, x2, x1 from test order by -2;
+SELECT
+    x3,
+    x3 - x2,
+    x2,
+    x1
+FROM test
+ORDER BY x2 ASC
 explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2;
 SELECT
     x3,
@@ -69,12 +123,28 @@ SELECT
     x1 + x2
 FROM test
 ORDER BY if(x3 > 10, x3, x1 + x2) ASC
+explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2;
+SELECT
+    x3,
+    if(x3 > 10, x3, x1 + x2),
+    x1 + x2
+FROM test
+ORDER BY if(x3 > 10, x3, x1 + x2) ASC
 explain syntax select max(x1), x2 from test group by 2 order by 1, 2;
 SELECT
     max(x1),
     x2
 FROM test
 GROUP BY x2
+ORDER BY
+    max(x1) ASC,
+    x2 ASC
+explain syntax select max(x1), x2 from test group by -1 order by -2, -1;
+SELECT
+    max(x1),
+    x2
+FROM test
+GROUP BY x2
 ORDER BY
     max(x1) ASC,
     x2 ASC
@@ -83,16 +153,34 @@ SELECT
     1 + greatest(x1, 1),
     x2
 FROM test
+GROUP BY
+    1 + greatest(x1, 1),
+    x2
+explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1;
+SELECT
+    1 + greatest(x1, 1),
+    x2
+FROM test
 GROUP BY
     1 + greatest(x1, 1),
     x2
 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
 select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
+select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
+select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
 explain syntax select x1 + x3, x3 from test group by 1, 2;
 SELECT
     x1 + x3,
     x3
 FROM test
+GROUP BY
+    x1 + x3,
+    x3
+explain syntax select x1 + x3, x3 from test group by -2, -1;
+SELECT
+    x1 + x3,
+    x3
+FROM test
 GROUP BY
     x1 + x3,
     x3
@@ -102,8 +190,14 @@ select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2,
 1	2	10	100
 10	20	1	10
 100	200	100	1
+select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc;
+1	2	10	100
+10	20	1	10
+100	200	100	1
 select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
 44	88	13	14	15	16
+select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a;
+44	88	13	14	15	16
 explain syntax select plus(1, 1) as a group by a;
 SELECT 1 + 1 AS a
 GROUP BY a
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql
index 159ad6bd427..6f427e0298d 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments.sql
+++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql
@@ -9,11 +9,21 @@ insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 
 -- { echo }
 select x3, x2, x1 from test order by 1;
+select x3, x2, x1 from test order by -3;
 select x3, x2, x1 from test order by x3;
 
+select x3, x2, x1 from test order by 3;
+select x3, x2, x1 from test order by -1;
+select x3, x2, x1 from test order by x1;
+
 select x3, x2, x1 from test order by 1 desc;
+select x3, x2, x1 from test order by -3 desc;
 select x3, x2, x1 from test order by x3 desc;
 
+select x3, x2, x1 from test order by 3 desc;
+select x3, x2, x1 from test order by -1 desc;
+select x3, x2, x1 from test order by x1 desc;
+
 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x3, x2 from test group by x3, x2 order by x3;
 select x3, x2 from test group by 1, 2 order by x3;
@@ -25,21 +35,32 @@ select x1, x2, x3 from test order by 3 limit 1 by 1;
 
 explain syntax select x3, x2, x1 from test order by 1;
 explain syntax select x3 + 1, x2, x1 from test order by 1;
+explain syntax select x3, x2, x1 from test order by -1;
+explain syntax select x3 + 1, x2, x1 from test order by -1;
 explain syntax select x3, x3 - x2, x2, x1 from test order by 2;
+explain syntax select x3, x3 - x2, x2, x1 from test order by -2;
 explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2;
+explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2;
 explain syntax select max(x1), x2 from test group by 2 order by 1, 2;
+explain syntax select max(x1), x2 from test group by -1 order by -2, -1;
 explain syntax select 1 + greatest(x1, 1), x2 from test group by 1, 2;
+explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1;
 
 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
 select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
+select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
+select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
 
 explain syntax select x1 + x3, x3 from test group by 1, 2;
+explain syntax select x1 + x3, x3 from test group by -2, -1;
 
 create table test2(x1 Int, x2 Int, x3 Int) engine=Memory;
 insert into test2 values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, 4 desc, 3 asc;
+select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc;
 
 select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
+select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a;
 
 explain syntax select plus(1, 1) as a group by a;
 select substr('aaaaaaaaaaaaaa', 8) as a  group by a order by a;

From cdfe99c380861caa2b09ccd937c2964153758518 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 11 Dec 2023 11:34:54 +0000
Subject: [PATCH 117/213] Remove unused template parameter from
 fillColumnsFromMap

---
 src/Interpreters/HashJoin.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 71e9d4bba80..c79db392eb4 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -1954,9 +1954,9 @@ public:
         }
         else
         {
-            auto fill_callback = [&](auto, auto strictness, auto & map)
+            auto fill_callback = [&](auto, auto, auto & map)
             {
-                rows_added = fillColumnsFromMap<strictness>(map, columns_right);
+                rows_added = fillColumnsFromMap(map, columns_right);
             };
 
             if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback))
@@ -2017,14 +2017,14 @@ private:
         return rows_added;
     }
 
-    template <JoinStrictness STRICTNESS, typename Maps>
+    template <typename Maps>
     size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
     {
         switch (parent.data->type)
         {
         #define M(TYPE) \
             case HashJoin::Type::TYPE: \
-                return fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right);
+                return fillColumns(*maps.TYPE, columns_keys_and_right);
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
             default:
@@ -2034,7 +2034,7 @@ private:
         UNREACHABLE();
     }
 
-    template <JoinStrictness STRICTNESS, typename Map>
+    template <typename Map>
     size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right)
     {
         size_t rows_added = 0;

From 966a09fb70465a161353ac005600608428da7847 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Dec 2023 12:42:23 +0100
Subject: [PATCH 118/213] Review fixes

---
 src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 2 +-
 src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp   | 2 --
 src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp  | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
index d369b8f3788..bb7e108a34e 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@@ -549,7 +549,7 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
                 return;
             }
 
-            auto log_table_structure_changed = [&](std::string_view reason)
+            auto log_table_structure_changed = [&](const std::string & reason)
             {
                 LOG_INFO(log, "Table structure of the table {} changed ({}), "
                          "will mark it as skipped from replication. "
diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
index 35daced0fa9..43de2069b19 100644
--- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
+++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
@@ -447,8 +447,6 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection
     assertBlocksHaveEqualStructure(input->getPort().getHeader(), block_io.pipeline.getHeader(), "postgresql replica load from snapshot");
     block_io.pipeline.complete(Pipe(std::move(input)));
 
-    /// TODO: make a test when we fail in the middle of inserting data from source.
-
     CompletedPipelineExecutor executor(block_io.pipeline);
     executor.execute();
 
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index 9eb35d14506..21bb5d9316f 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -198,7 +198,7 @@ void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructur
         auto table_id = getStorageID();
         auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName());
         LOG_DEBUG(log, "Creating clickhouse table for postgresql table {} (ast: {})",
-                  table_id.getNameForLogs(), serializeAST(*ast_create));
+                  table_id.getNameForLogs(), ast_create->formatForLogging());
 
         InterpreterCreateQuery interpreter(ast_create, nested_context);
         interpreter.execute();

From a62a0b92de558ac2376eecf54f883353b6a1afb9 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 11 Dec 2023 12:46:22 +0100
Subject: [PATCH 119/213] Increase async block cache deduplication timeout

---
 src/Storages/MergeTree/AsyncBlockIDsCache.cpp | 36 +++++++++----------
 src/Storages/MergeTree/AsyncBlockIDsCache.h   |  1 +
 src/Storages/MergeTree/MergeTreeSettings.h    |  3 +-
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
index a9cdd09e061..4f3a8f16366 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
@@ -79,11 +79,12 @@ catch (...)
 
 template <typename TStorage>
 AsyncBlockIDsCache<TStorage>::AsyncBlockIDsCache(TStorage & storage_)
-    : storage(storage_),
-    update_min_interval(storage.getSettings()->async_block_ids_cache_min_update_interval_ms),
-    path(storage.getZooKeeperPath() + "/async_blocks"),
-    log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)"),
-    log(&Poco::Logger::get(log_name))
+    : storage(storage_)
+    , update_min_interval(storage.getSettings()->async_block_ids_cache_min_update_interval_ms)
+    , update_wait(storage.getSettings()->async_block_ids_cache_update_wait_ms)
+    , path(storage.getZooKeeperPath() + "/async_blocks")
+    , log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)")
+    , log(&Poco::Logger::get(log_name))
 {
     task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ update(); });
 }
@@ -102,21 +103,20 @@ Strings AsyncBlockIDsCache<TStorage>::detectConflicts(const Strings & paths, UIn
     if (!storage.getSettings()->use_async_block_ids_cache)
         return {};
 
-    std::unique_lock lk(mu);
-    /// For first time access of this cache, the `last_version` is zero, so it will not block here.
-    /// For retrying request, We compare the request version and cache version, because zk only returns
-    /// incomplete information of duplication, we need to update the cache to find out more duplication.
-    /// The timeout here is to prevent deadlock, just in case.
-    cv.wait_for(lk, update_min_interval * 2, [&]{return version != last_version;});
-
-    if (version == last_version)
-        LOG_INFO(log, "Read cache with a old version {}", last_version);
-
     CachePtr cur_cache;
-    cur_cache = cache_ptr;
-    last_version = version;
+    {
+        std::unique_lock lk(mu);
+        /// For first time access of this cache, the `last_version` is zero, so it will not block here.
+        /// For retrying request, We compare the request version and cache version, because zk only returns
+        /// incomplete information of duplication, we need to update the cache to find out more duplication.
+        cv.wait_for(lk, update_wait, [&]{return version != last_version;});
 
-    lk.unlock();
+        if (version == last_version)
+            LOG_INFO(log, "Read cache with a old version {}", last_version);
+
+        cur_cache = cache_ptr;
+        last_version = version;
+    }
 
     if (cur_cache == nullptr)
         return {};
diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h
index fbd97fd00ff..8ce65ec4927 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.h
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h
@@ -33,6 +33,7 @@ private:
 
     std::atomic<std::chrono::steady_clock::time_point> last_updatetime;
     const std::chrono::milliseconds update_min_interval;
+    const std::chrono::milliseconds update_wait;
 
     std::mutex mu;
     CachePtr cache_ptr;
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 5bb712ea786..c71951503c8 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -95,7 +95,8 @@ struct Settings;
     M(UInt64, replicated_deduplication_window_seconds, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \
     M(UInt64, replicated_deduplication_window_for_async_inserts, 10000, "How many last hash values of async_insert blocks should be kept in ZooKeeper (old blocks will be deleted).", 0) \
     M(UInt64, replicated_deduplication_window_seconds_for_async_inserts, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window_for_async_inserts\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \
-    M(Milliseconds, async_block_ids_cache_min_update_interval_ms, 100, "Minimum interval between updates of async_block_ids_cache", 0) \
+    M(Milliseconds, async_block_ids_cache_min_update_interval_ms, 1000, "Minimum interval between updates of async_block_ids_cache", 0) \
+    M(Milliseconds, async_block_ids_cache_update_wait_ms, 100, "How long each insert iteration will wait for async_block_ids_cache update", 0) \
     M(Bool, use_async_block_ids_cache, true, "Use in-memory cache to filter duplicated async inserts based on block ids", 0) \
     M(UInt64, max_replicated_logs_to_keep, 1000, "How many records may be in log, if there is inactive replica. Inactive replica becomes lost when when this number exceed.", 0) \
     M(UInt64, min_replicated_logs_to_keep, 10, "Keep about this number of last records in ZooKeeper log, even if they are obsolete. It doesn't affect work of tables: used only to diagnose ZooKeeper log before cleaning.", 0) \

From af45e138ad2f5f17414bd81262c7b319632a40c6 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 11 Dec 2023 12:33:11 +0000
Subject: [PATCH 120/213] fix

---
 src/Interpreters/HashJoin.cpp                            | 8 ++++----
 .../02516_join_with_totals_and_subquery_bug.reference    | 9 ++++++---
 .../02516_join_with_totals_and_subquery_bug.sql          | 6 +++---
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index c79db392eb4..6e62e595caa 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -269,7 +269,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
         sample_block_with_columns_to_add = right_table_keys = materializeBlock(right_sample_block);
     }
 
-    JoinCommon::convertToFullColumnsInplace(right_table_keys);
+    materializeBlockInplace(right_table_keys);
     initRightBlockStructure(data->sample_block);
 
     JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
@@ -2028,7 +2028,7 @@ private:
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
             default:
-                throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type)   ;
+                throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type);
         }
 
         UNREACHABLE();
@@ -2080,8 +2080,8 @@ private:
             {
                 const Mapped & mapped = it->getMapped();
 
-                size_t off = map.offsetInternal(it.getPtr());
-                if (parent.isUsed(off))
+                size_t offset = map.offsetInternal(it.getPtr());
+                if (parent.isUsed(offset))
                     continue;
                 AdderNonJoined<Mapped>::add(mapped, rows_added, columns_keys_and_right);
 
diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
index 51e8394a2f7..83571fd9005 100644
--- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
+++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
@@ -10,19 +10,22 @@
 
 100000000000000000000
 ---
+[]	0	['2']
 ['0']	2	['0']
 ['0']	2	['0']
-['1']	1	['1']
+['1']	1	[]
 
 []	3	[]
 ---
+[]	0	['2']	1
 ['0']	2	['0']	2
-['1']	1	['1']	1
+['1']	1	[]	0
 
 []	3	[]	3
 ---
+[]	['2']	1
 ['0']	['0']	2
 ['0']	['0']	2
-['1']	['1']	1
+['1']	[]	0
 
 []	[]	3
diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
index 243dceffc43..d39efb0b193 100644
--- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
+++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
@@ -78,7 +78,7 @@ FROM (
     WITH TOTALS
 ) AS l
 FULL JOIN (
-    SELECT ([toString(number % 2)] :: Array(String)) AS item_id FROM numbers(3)
+    SELECT ([toString((number % 2) * 2)] :: Array(String)) AS item_id FROM numbers(3)
 ) AS r
 ON l.item_id = r.item_id
 ORDER BY 1,2,3
@@ -92,7 +92,7 @@ FROM (
     WITH TOTALS
 ) AS l
 FULL JOIN (
-    SELECT ([toString(number % 2)] :: Array(String)) AS item_id, count() FROM numbers(3) GROUP BY item_id
+    SELECT ([toString((number % 2) * 2)] :: Array(String)) AS item_id, count() FROM numbers(3) GROUP BY item_id
     WITH TOTALS
 ) AS r
 ON l.item_id = r.item_id
@@ -106,7 +106,7 @@ FROM (
     SELECT ([toString(number % 2)] :: Array(String)) AS item_id FROM numbers(3)
 ) AS l
 FULL JOIN (
-    SELECT ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, count() FROM numbers(3) GROUP BY item_id
+    SELECT ([toString((number % 2) * 2)] :: Array(LowCardinality(String))) AS item_id, count() FROM numbers(3) GROUP BY item_id
     WITH TOTALS
 ) AS r
 ON l.item_id = r.item_id

From 1e4d61d55a7a83821583b53d842886bcaee371ec Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Dec 2023 13:44:12 +0100
Subject: [PATCH 121/213] Change error code

---
 src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
index bb7e108a34e..a7ac609a4c5 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp
@@ -22,6 +22,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int POSTGRESQL_REPLICATION_INTERNAL_ERROR;
     extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
 }
 
 namespace
@@ -104,7 +105,7 @@ MaterializedPostgreSQLConsumer::StorageData::Buffer::Buffer(
             columns_.begin(), columns_.end(),
             [](const auto & col) { return col.name == "_sign" || col.name == "_version"; }))
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN,
                         "PostgreSQL table cannot contain `_sign` or `_version` columns "
                         "as they are reserved for internal usage");
     }

From 821c7d65271f8319e0867d003a71bdcca6f65bf7 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 11 Dec 2023 15:48:52 +0300
Subject: [PATCH 122/213] Fixed tests

---
 src/DataTypes/Serializations/SerializationString.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index c59c642cf04..308bdce0507 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -174,7 +174,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
         offsets.push_back(offset);
 
         if (unlikely(offset > data.size()))
-            data.resize(data.size() * 2);
+            data.resize(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2)));
 
         if (size)
         {

From 4ccf4e11a8ee0e1a7504701f0c77792064bcb5a1 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Dec 2023 13:00:08 +0000
Subject: [PATCH 123/213] Parallel replicas (perf): announcement response
 handling improvement

---
 .../MergeTree/ParallelReplicasReadingCoordinator.cpp  | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 9137dc89705..44d10eda21e 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -161,11 +161,10 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc
     PartRefs parts_diff;
 
     /// To get rid of duplicates
-    for (auto && part: announcement.description)
+    for (auto && part_ranges: announcement.description)
     {
-        auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
-            [&part] (const Part & other) { return other.description.info.getPartNameV1() == part.info.getPartNameV1(); });
-
+        Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}};
+        auto the_same_it = all_parts_to_read.find(part);
         /// We have the same part - add the info about presence on current replica to it
         if (the_same_it != all_parts_to_read.end())
         {
@@ -174,13 +173,13 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc
         }
 
         auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
-            [&part] (const Part & other) { return !other.description.info.isDisjoint(part.info); });
+            [&part] (const Part & other) { return !other.description.info.isDisjoint(part.description.info); });
 
         /// It is covering part or we have covering - skip it
         if (covering_or_the_same_it != all_parts_to_read.end())
             continue;
 
-        auto [insert_it, _] = all_parts_to_read.emplace(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
+        auto [insert_it, _] = all_parts_to_read.emplace(part);
         parts_diff.push_back(insert_it);
     }
 

From fc4e6d70d863cdc6c714ee12d91c3958efd882e3 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Dec 2023 14:24:05 +0100
Subject: [PATCH 124/213] Update run.sh

---
 docker/test/stateful/run.sh | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 829b3547856..a0def50bfb5 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -24,11 +24,15 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
 
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
 
-cache_policy="SLRU"
-#TODO: uncomment this before merge, for testing purposes it is SLRU only before merge.
-#if [$(($RANDOM%2)) -eq 1]; then
-#    cache_policy="LRU"
-#fi
+cache_policy=""
+if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
+    cache_policy="SLRU"
+else
+    cache_policy="LRU"
+fi
+
+echo "Using cache policy: $cache_policy"
+
 if [ "$cache_policy" = "SLRU" ]; then
     sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
     | sed "s|<cache_policy>LRU</cache_policy>|<cache_policy>SLRU</cache_policy>|" \

From fcaa556bf79e4821e4c1a82b3fc8919c21127158 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 11 Dec 2023 13:39:10 +0000
Subject: [PATCH 125/213] Fixing test.

---
 .../Transforms/AggregatingTransform.cpp       | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index ec8ea9396e4..7b1d51bb320 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -405,26 +405,28 @@ private:
             }
         }
 
-        if (!shared_data->is_bucket_processed[current_bucket_num])
-            return Status::NeedData;
-
-        if (!two_level_chunks[current_bucket_num])
-            return Status::NeedData;
-
-        auto chunk = std::move(two_level_chunks[current_bucket_num]);
-        const auto has_rows = chunk.hasRows();
-        if (has_rows)
-            output.push(std::move(chunk));
-
-        ++current_bucket_num;
-        if (current_bucket_num == NUM_BUCKETS)
+        while (current_bucket_num < NUM_BUCKETS)
         {
-            output.finish();
-            /// Do not close inputs, they must be finished.
-            return Status::Finished;
+            if (!shared_data->is_bucket_processed[current_bucket_num])
+                return Status::NeedData;
+
+            if (!two_level_chunks[current_bucket_num])
+                return Status::NeedData;
+
+            auto chunk = std::move(two_level_chunks[current_bucket_num]);
+            ++current_bucket_num;
+
+            const auto has_rows = chunk.hasRows();
+            if (has_rows)
+            {
+                output.push(std::move(chunk));
+                return Status::PortFull;
+            }
         }
 
-        return has_rows ? Status::PortFull : Status::NeedData;
+        output.finish();
+        /// Do not close inputs, they must be finished.
+        return Status::Finished;
     }
 
     AggregatingTransformParamsPtr params;

From 6b1acf7e9b7697b28215450cb8a86c2a9284c687 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 11 Dec 2023 17:06:13 +0300
Subject: [PATCH 126/213] Test

---
 tests/queries/0_stateless/01926_order_by_desc_limit.sql | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql
index a0047a2925a..6854e6c1e84 100644
--- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql
+++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql
@@ -11,11 +11,9 @@ SETTINGS index_granularity = 1024, index_granularity_bytes = '10Mi';
 INSERT INTO order_by_desc SELECT number, repeat('a', 1024) FROM numbers(1024 * 300);
 OPTIMIZE TABLE order_by_desc FINAL;
 
-SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null
-SETTINGS max_memory_usage = '400M';
+SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null;
 
-SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null
-SETTINGS max_memory_usage = '400M';
+SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null;
 
 SYSTEM FLUSH LOGS;
 

From 2c033487323796c8f35b1400817e5b9571d4a0c7 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 11 Dec 2023 15:20:46 +0100
Subject: [PATCH 127/213] Use checkTimeLimit() to check if query is cancelled.

---
 src/Functions/sleep.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index 36fa14cd36e..11b8e48a295 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -28,7 +28,6 @@ namespace ErrorCodes
     extern const int TOO_SLOW;
     extern const int ILLEGAL_COLUMN;
     extern const int BAD_ARGUMENTS;
-    extern const int QUERY_WAS_CANCELLED;
 }
 
 /** sleep(seconds) - the specified number of seconds sleeps each columns.
@@ -144,8 +143,8 @@ public:
                     sleepForMicroseconds(sleep_ms);
                     microseconds -= sleep_ms;
 
-                    if (query_status && query_status->isKilled())
-                        throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
+                    if (query_status && !query_status->checkTimeLimit())
+                        break;
                 }
 
                 ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);

From ff1e95c3575c4b3956e2f7eacda3af7a41151891 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 11 Dec 2023 15:37:19 +0100
Subject: [PATCH 128/213] Fix style.

---
 tests/queries/0_stateless/02932_kill_query_sleep.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02932_kill_query_sleep.sh b/tests/queries/0_stateless/02932_kill_query_sleep.sh
index 81bb892bc15..836d7f2c686 100755
--- a/tests/queries/0_stateless/02932_kill_query_sleep.sh
+++ b/tests/queries/0_stateless/02932_kill_query_sleep.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 function wait_query_started()
 {
     local query_id="$1"
-    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.query_log WHERE query_id='$query_id'") == 0 ]]; do
+    while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.query_log WHERE query_id='$query_id' AND current_database = currentDatabase()") == 0 ]]; do
         sleep 0.1;
         $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS;"
     done
@@ -35,4 +35,4 @@ echo "Cancelling query"
 kill_query "$sleep_query_id"
 
 $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS;"
-$CLICKHOUSE_CLIENT --query "SELECT exception FROM system.query_log WHERE query_id='$sleep_query_id'" | grep -oF "QUERY_WAS_CANCELLED"
+$CLICKHOUSE_CLIENT --query "SELECT exception FROM system.query_log WHERE query_id='$sleep_query_id' AND current_database = currentDatabase()" | grep -oF "QUERY_WAS_CANCELLED"

From a87a8e91cf1ae8bf449d4f7d95278ab79114d6a3 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 11 Dec 2023 14:46:12 +0000
Subject: [PATCH 129/213] Slightly better inference of unnamed tupes in JSON
 formats

---
 src/Formats/SchemaInferenceUtils.cpp               | 14 ++++++++++++--
 ...son_array_of_unnamed_tuples_inference.reference |  1 +
 ...2940_json_array_of_unnamed_tuples_inference.sql |  2 ++
 3 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference
 create mode 100644 tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.sql

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 94166aa9002..e6586d2d271 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -1247,11 +1247,22 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F
             return;
         }
 
+        /// First, try to transform nested types without final transformations to see if there is a common type.
+        auto nested_types_copy = nested_types;
+        transformInferredTypesIfNeededImpl<true>(nested_types_copy, settings, json_info);
+        if (checkIfTypesAreEqual(nested_types_copy))
+        {
+            data_type = std::make_shared<DataTypeArray>(nested_types_copy.back());
+            transformFinalInferredJSONTypeIfNeededImpl(data_type, settings, json_info);
+            return;
+        }
+
+        /// Apply final transformation to nested types, and then try to find common type.
         for (auto & nested_type : nested_types)
             /// Don't change Nothing to String in nested types here, because we are not sure yet if it's Array or actual Tuple
             transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, /*remain_nothing_types=*/ true);
 
-        auto nested_types_copy = nested_types;
+        nested_types_copy = nested_types;
         transformInferredTypesIfNeededImpl<true>(nested_types_copy, settings, json_info);
         if (checkIfTypesAreEqual(nested_types_copy))
         {
@@ -1381,7 +1392,6 @@ DataTypePtr makeNullableRecursively(DataTypePtr type)
             return std::make_shared<DataTypeTuple>(std::move(nested_types), tuple_type->getElementNames());
 
         return std::make_shared<DataTypeTuple>(std::move(nested_types));
-
     }
 
     if (which.isMap())
diff --git a/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference b/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference
new file mode 100644
index 00000000000..aac3e471264
--- /dev/null
+++ b/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.reference
@@ -0,0 +1 @@
+data	Array(Tuple(Nullable(Int64), Tuple(a Nullable(Int64), b Nullable(Int64)), Nullable(Int64), Nullable(String)))					
diff --git a/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.sql b/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.sql
new file mode 100644
index 00000000000..a8a7af1f96c
--- /dev/null
+++ b/tests/queries/0_stateless/02940_json_array_of_unnamed_tuples_inference.sql
@@ -0,0 +1,2 @@
+desc format(JSONEachRow, '{"data" : [[1, null, 3, null], [null, {"a" : 12, "b" : 12}, null, "string"], [null, null, 4, "string"]]}');
+

From ca262d6e10af713f5d296cf90d05e59ad00be50d Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 11 Dec 2023 15:55:34 +0100
Subject: [PATCH 130/213] Fix profile events.

---
 src/Common/ProfileEvents.cpp |  3 ++-
 src/Functions/sleep.h        | 13 ++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index f9ea03f4947..7d6abd587c5 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -456,7 +456,8 @@ The server successfully detected this situation and will download merged part fr
     M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)") \
     \
     M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \
-    M(SleepFunctionMicroseconds, "Time spent sleeping due to a sleep function call.") \
+    M(SleepFunctionMicroseconds, "Time set to sleep in a sleep function (sleep, sleepEachRow).") \
+    M(SleepFunctionElapsedMicroseconds, "Time spent sleeping in a sleep function (sleep, sleepEachRow).") \
     \
     M(ThreadPoolReaderPageCacheHit, "Number of times the read inside ThreadPoolReader was done from page cache.") \
     M(ThreadPoolReaderPageCacheHitBytes, "Number of bytes read inside ThreadPoolReader when it was done from page cache.") \
diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index 11b8e48a295..f5d3b6f29cd 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -17,6 +17,7 @@ namespace ProfileEvents
 {
 extern const Event SleepFunctionCalls;
 extern const Event SleepFunctionMicroseconds;
+extern const Event SleepFunctionElapsedMicroseconds;
 }
 
 namespace DB
@@ -134,14 +135,15 @@ public:
                         "The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})",
                         max_microseconds, microseconds, size);
 
-                while (microseconds)
+                UInt64 elapsed = 0;
+                while (elapsed < microseconds)
                 {
-                    UInt64 sleep_ms = microseconds;
+                    UInt64 sleep_time = microseconds - elapsed;
                     if (query_status)
-                        sleep_ms = std::min(sleep_ms, /* 1 second */ static_cast<size_t>(1000000));
+                        sleep_time = std::min(sleep_time, /* 1 second */ static_cast<size_t>(1000000));
 
-                    sleepForMicroseconds(sleep_ms);
-                    microseconds -= sleep_ms;
+                    sleepForMicroseconds(sleep_time);
+                    elapsed += sleep_time;
 
                     if (query_status && !query_status->checkTimeLimit())
                         break;
@@ -149,6 +151,7 @@ public:
 
                 ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
                 ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
+                ProfileEvents::increment(ProfileEvents::SleepFunctionElapsedMicroseconds, elapsed);
             }
         }
 

From ea09080c73102e118fc01c4286eff74bcf904573 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 11 Dec 2023 15:50:27 +0000
Subject: [PATCH 131/213] fix

---
 src/Interpreters/HashJoin.cpp  | 62 +++++++++++++---------------------
 src/Interpreters/JoinUtils.cpp | 14 --------
 src/Interpreters/JoinUtils.h   |  1 -
 src/Storages/StorageJoin.cpp   |  3 +-
 4 files changed, 24 insertions(+), 56 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 6e62e595caa..ce4236317ac 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -373,10 +373,20 @@ HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_c
         return Type::keys256;
 
     /// If there is single string key, use hash table of it's values.
-    if (keys_size == 1
-        && (typeid_cast<const ColumnString *>(key_columns[0])
-            || (isColumnConst(*key_columns[0]) && typeid_cast<const ColumnString *>(&assert_cast<const ColumnConst *>(key_columns[0])->getDataColumn()))))
-        return Type::key_string;
+    if (keys_size == 1)
+    {
+        auto is_string_column = [](const IColumn * column_ptr) -> bool
+        {
+            if (const auto * lc_column_ptr = typeid_cast<const ColumnLowCardinality *>(column_ptr))
+                return typeid_cast<const ColumnString *>(lc_column_ptr->getDictionary().getNestedColumn().get());
+            return typeid_cast<const ColumnString *>(column_ptr);
+        };
+
+        const auto * key_column = key_columns[0];
+        if (is_string_column(key_column) ||
+            (isColumnConst(*key_column) && is_string_column(assert_cast<const ColumnConst *>(key_column)->getDataColumnPtr().get())))
+            return Type::key_string;
+    }
 
     if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(key_columns[0]))
         return Type::key_fixed_string;
@@ -791,7 +801,13 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
 
     size_t rows = source_block.rows();
 
-    ColumnPtrMap all_key_columns = JoinCommon::materializeColumnsInplaceMap(source_block, table_join->getAllNames(JoinTableSide::Right));
+    const auto & right_key_names = table_join->getAllNames(JoinTableSide::Right);
+    ColumnPtrMap all_key_columns(right_key_names.size());
+    for (const auto & column_name : right_key_names)
+    {
+        const auto & column = source_block.getByName(column_name).column;
+        all_key_columns[column_name] = recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst()));
+    }
 
     Block block_to_save = prepareRightBlock(source_block);
     if (shrink_blocks)
@@ -804,6 +820,8 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
             throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates");
 
         data->blocks_allocated_size += block_to_save.allocatedBytes();
+
+        assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "Saved joined block structure mismatch");
         data->blocks.emplace_back(std::move(block_to_save));
         Block * stored_block = &data->blocks.back();
 
@@ -1061,33 +1079,6 @@ public:
         return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name);
     }
 
-    static void assertBlockEqualsStructureUpToLowCard(const Block & lhs_block, const Block & rhs_block)
-    {
-        if (lhs_block.columns() != rhs_block.columns())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Different number of columns in blocks [{}] and [{}]",
-                lhs_block.dumpStructure(), rhs_block.dumpStructure());
-
-        for (size_t i = 0; i < lhs_block.columns(); ++i)
-        {
-            const auto & lhs = lhs_block.getByPosition(i);
-            const auto & rhs = rhs_block.getByPosition(i);
-            if (lhs.name != rhs.name)
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})",
-                    lhs_block.dumpStructure(), rhs_block.dumpStructure(), lhs.name, rhs.name);
-
-            const auto & ltype = recursiveRemoveLowCardinality(lhs.type);
-            const auto & rtype = recursiveRemoveLowCardinality(rhs.type);
-            if (!ltype->equals(*rtype))
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})",
-                    lhs_block.dumpStructure(), rhs_block.dumpStructure(), ltype->getName(), rtype->getName());
-
-            const auto & lcol = recursiveRemoveLowCardinality(lhs.column);
-            const auto & rcol = recursiveRemoveLowCardinality(rhs.column);
-            if (lcol->getDataType() != rcol->getDataType())
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})",
-                    lhs_block.dumpStructure(), rhs_block.dumpStructure(), lcol->getDataType(), rcol->getDataType());
-        }
-    }
 
     template <bool has_defaults>
     void appendFromBlock(const Block & block, size_t row_num)
@@ -1095,13 +1086,6 @@ public:
         if constexpr (has_defaults)
             applyLazyDefaults();
 
-#ifndef NDEBUG
-        /// Like assertBlocksHaveEqualStructure but doesn't check low cardinality
-        assertBlockEqualsStructureUpToLowCard(sample_block, block);
-#else
-        UNUSED(assertBlockEqualsStructureUpToLowCard);
-#endif
-
         if (is_join_get)
         {
             /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin.
diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp
index be5ee80bd53..949a97d5748 100644
--- a/src/Interpreters/JoinUtils.cpp
+++ b/src/Interpreters/JoinUtils.cpp
@@ -315,20 +315,6 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names)
     return ptrs;
 }
 
-ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & names)
-{
-    ColumnPtrMap ptrs;
-    ptrs.reserve(names.size());
-
-    for (const auto & column_name : names)
-    {
-        ColumnPtr column = block.getByName(column_name).column;
-        ptrs[column_name] = materializeColumn(column);
-    }
-
-    return ptrs;
-}
-
 ColumnPtr materializeColumn(const Block & block, const String & column_name)
 {
     const auto & src_column = block.getByName(column_name).column;
diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h
index 7daed6b7f7e..a88fca02bd8 100644
--- a/src/Interpreters/JoinUtils.h
+++ b/src/Interpreters/JoinUtils.h
@@ -70,7 +70,6 @@ ColumnPtr emptyNotNullableClone(const ColumnPtr & column);
 ColumnPtr materializeColumn(const Block & block, const String & name);
 Columns materializeColumns(const Block & block, const Names & names);
 ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
-ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & names);
 ColumnRawPtrs getRawPointers(const Columns & columns);
 void convertToFullColumnsInplace(Block & block);
 void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type = true);
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index 121d859a3f2..efe446a8ccd 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -535,8 +535,7 @@ private:
 #undef M
 
             default:
-                throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys in StorageJoin. Type: {}",
-                                static_cast<UInt32>(join->data->type));
+                throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys of type {} in StorageJoin", join->data->type);
         }
 
         if (!rows_added)

From 380e34226fbfbbf6dcbc22188a50bb5afdce42c3 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 11 Dec 2023 16:57:18 +0100
Subject: [PATCH 132/213] Fix compilation.

---
 src/Functions/sleep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h
index f5d3b6f29cd..73d58ca6b5b 100644
--- a/src/Functions/sleep.h
+++ b/src/Functions/sleep.h
@@ -140,7 +140,7 @@ public:
                 {
                     UInt64 sleep_time = microseconds - elapsed;
                     if (query_status)
-                        sleep_time = std::min(sleep_time, /* 1 second */ static_cast<size_t>(1000000));
+                        sleep_time = std::min(sleep_time, /* 1 second */ static_cast<UInt64>(1000000));
 
                     sleepForMicroseconds(sleep_time);
                     elapsed += sleep_time;

From c76ceb29fd3619b755c631d0d0aa4a60a602a97b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 11 Dec 2023 17:21:02 +0100
Subject: [PATCH 133/213] Minor changes

---
 docker/test/clickbench/run.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 82eb06dffcd..7357fa6df86 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -31,6 +31,8 @@ clickhouse-client --time < /create.sql
 
 # Run the queries
 
+set +x
+
 TRIES=3
 QUERY_NUM=1
 while read -r query; do
@@ -47,6 +49,8 @@ while read -r query; do
     QUERY_NUM=$((QUERY_NUM + 1))
 done < /queries.sql
 
+set -x
+
 clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'"
 
 echo -e "success\tClickBench finished" > /test_output/check_status.tsv

From 48ce04062125b93c0fa3e83785ef18f4d7f54dce Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Dec 2023 17:30:23 +0100
Subject: [PATCH 134/213] Fix

---
 src/Databases/DDLLoadingDependencyVisitor.cpp |  7 +++
 src/Databases/DatabaseOrdinary.cpp            |  2 +
 .../StorageMaterializedPostgreSQL.cpp         |  3 -
 .../StorageMaterializedPostgreSQL.h           |  3 +
 .../configs/log_conf.xml                      |  6 ++
 .../test.py                                   | 56 +++++++++++++++++++
 6 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp
index 77a40f674fd..8f1b82941f6 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@@ -1,6 +1,7 @@
 #include <Databases/DDLLoadingDependencyVisitor.h>
 #include <Databases/DDLDependencyVisitor.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
+#include <Storages/PostgreSQL/StorageMaterializedPostgreSQL.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/misc.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -131,6 +132,12 @@ void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data)
         extractTableNameFromArgument(*storage.engine, data, 3);
     else if (storage.engine->name == "Dictionary")
         extractTableNameFromArgument(*storage.engine, data, 0);
+    else if (storage.engine->name == "MaterializedPostgreSQL")
+    {
+        const auto * create_query = data.create_query->as<ASTCreateQuery>();
+        auto nested_table = toString(create_query->uuid) + StorageMaterializedPostgreSQL::NESTED_TABLE_SUFFIX;
+        data.dependencies.emplace(QualifiedTableName{ .database = create_query->getDatabase(), .table = nested_table });
+    }
 }
 
 
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 1f344551c5e..9a9dcf22c88 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -139,6 +139,8 @@ void DatabaseOrdinary::loadTableFromMetadata(
     assert(name.database == TSA_SUPPRESS_WARNING_FOR_READ(database_name));
     const auto & query = ast->as<const ASTCreateQuery &>();
 
+    LOG_TRACE(log, "Loading table {}", name.getFullName());
+
     try
     {
         auto [table_name, table] = createTableFromAST(
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index a287b96fe51..9cceb3ec503 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -45,9 +45,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-static const auto NESTED_TABLE_SUFFIX = "_nested";
-static const auto TMP_SUFFIX = "_tmp";
-
 
 /// For the case of single storage.
 StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h
index ca7b801cb7c..6bc0856e4dd 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h
@@ -63,6 +63,9 @@ namespace DB
 class StorageMaterializedPostgreSQL final : public IStorage, WithContext
 {
 public:
+    static constexpr auto NESTED_TABLE_SUFFIX = "_nested";
+    static constexpr auto TMP_SUFFIX = "_tmp";
+
     StorageMaterializedPostgreSQL(const StorageID & table_id_, ContextPtr context_,
                                 const String & postgres_database_name, const String & postgres_table_name);
 
diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/log_conf.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/log_conf.xml
index 6cc1128e130..c9f6195a014 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/configs/log_conf.xml
+++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/log_conf.xml
@@ -24,4 +24,10 @@
             <database>postgres_database</database>
         </postgres2>
     </named_collections>
+    <text_log>
+        <database>system</database>
+        <table>text_log</table>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <level>Test</level>
+    </text_log>
 </clickhouse>
diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py
index df72a2f705c..aa28db9a56d 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py
@@ -944,6 +944,62 @@ def test_symbols_in_publication_name(started_cluster):
     )
 
 
+def test_dependent_loading(started_cluster):
+    table = "test_dependent_loading"
+
+    pg_manager.create_postgres_table(table)
+    instance.query(
+        f"INSERT INTO postgres_database.{table} SELECT number, number from numbers(0, 50)"
+    )
+
+    instance.query(
+        f"""
+        SET allow_experimental_materialized_postgresql_table=1;
+        CREATE TABLE {table} (key Int32, value Int32)
+        ENGINE=MaterializedPostgreSQL('{started_cluster.postgres_ip}:{started_cluster.postgres_port}', 'postgres_database', '{table}', 'postgres', 'mysecretpassword') ORDER BY key
+        """
+    )
+
+    check_tables_are_synchronized(
+        instance,
+        table,
+        postgres_database=pg_manager.get_default_database(),
+        materialized_database="default",
+    )
+
+    assert 50 == int(instance.query(f"SELECT count() FROM {table}"))
+
+    instance.restart_clickhouse()
+
+    check_tables_are_synchronized(
+        instance,
+        table,
+        postgres_database=pg_manager.get_default_database(),
+        materialized_database="default",
+    )
+
+    assert 50 == int(instance.query(f"SELECT count() FROM {table}"))
+
+    uuid = instance.query(
+        f"SELECT uuid FROM system.tables WHERE name='{table}' and database='default' limit 1"
+    ).strip()
+    nested_table = f"default.`{uuid}_nested`"
+    instance.contains_in_log(
+        f"Table default.{table} has 1 dependencies: {nested_table} (level 1)"
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+    nested_time = instance.query(
+        f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{uuid}_nested' and message not like '%like%'"
+    ).strip()
+    time = instance.query(
+        f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'"
+    ).strip()
+    instance.query(
+        f"SELECT toDateTime64('{nested_time}', 6) < toDateTime64('{time}', 6)"
+    )
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From c3a76fcc0888f0cb2db67a694157d9bb17aeb267 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 16 Nov 2023 13:25:31 +0000
Subject: [PATCH 135/213] Allow to infer numbers from strings in CSV format

---
 src/Core/Settings.h                                         | 1 +
 src/Formats/EscapingRuleUtils.cpp                           | 4 ++--
 src/Formats/FormatFactory.cpp                               | 1 +
 src/Formats/FormatSettings.h                                | 1 +
 .../02916_csv_infer_numbers_from_strings.reference          | 6 ++++++
 .../0_stateless/02916_csv_infer_numbers_from_strings.sql    | 4 ++++
 6 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.reference
 create mode 100644 tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9601cd3e398..1e3b05bedb0 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -942,6 +942,7 @@ class IColumn;
     M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \
     M(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, "The maximum bytes of data to read for automatic schema inference", 0) \
     M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
+    M(Bool, input_format_csv_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference in CSV format", 0) \
     M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
     M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
     M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index d429985e52a..9cc7cb3b89e 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -303,8 +303,8 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
                 /// Try to determine the type of value inside quotes
                 auto type = tryInferDataTypeForSingleField(data, format_settings);
 
-                /// If we couldn't infer any type or it's a number or tuple in quotes, we determine it as a string.
-                if (!type || isNumber(removeNullable(type)) || isTuple(type))
+                /// If we couldn't infer any type or it's tuple in quotes or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
+                if (!type || isTuple(type) || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings))
                     return std::make_shared<DataTypeString>();
 
                 return type;
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index b2ff9b1c3f7..1d62e58176b 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -74,6 +74,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
     format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
     format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
+    format_settings.csv.try_infer_numbers_from_strings = settings.input_format_csv_try_infer_numbers_from_strings;
     format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
     format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
     format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index d9e3a420502..8d5c044a311 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -164,6 +164,7 @@ struct FormatSettings
         bool allow_whitespace_or_tab_as_delimiter = false;
         bool allow_variable_number_of_columns = false;
         bool use_default_on_bad_values = false;
+        bool try_infer_numbers_from_strings = true;
     } csv;
 
     struct HiveText
diff --git a/tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.reference b/tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.reference
new file mode 100644
index 00000000000..f64557f1b70
--- /dev/null
+++ b/tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.reference
@@ -0,0 +1,6 @@
+c1	Nullable(Int64)					
+c2	Nullable(Float64)					
+c3	Nullable(Bool)					
+c1	Nullable(String)					
+c2	Nullable(String)					
+c3	Nullable(String)					
diff --git a/tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.sql b/tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.sql
new file mode 100644
index 00000000000..713d3d7190c
--- /dev/null
+++ b/tests/queries/0_stateless/02916_csv_infer_numbers_from_strings.sql
@@ -0,0 +1,4 @@
+set input_format_csv_try_infer_numbers_from_strings=1;
+desc format(CSV, '"42","42.42","True"');
+desc format(CSV, '"42","42.42","True"\n"abc","def","ghk"');
+

From 1db000dae5e555723d9ad6a6d2ff1617db159af2 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 16 Nov 2023 13:31:04 +0000
Subject: [PATCH 136/213] Add docs

---
 docs/en/interfaces/formats.md                 |  1 +
 docs/en/interfaces/schema-inference.md        | 21 +++++++++++++++++++
 .../operations/settings/settings-formats.md   |  7 +++++++
 3 files changed, 29 insertions(+)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 57de0555bf6..836b1f2f637 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -478,6 +478,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
 - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
 - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
+- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.
 
 ## CSVWithNames {#csvwithnames}
 
diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 0aadb09730a..34b98181355 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -834,6 +834,27 @@ $$)
 └──────────────┴───────────────┘
 ```
 
+#### CSV settings {#csv-settings}
+
+##### input_format_csv_try_infer_numbers_from_strings
+
+Enabling this setting allows inferring numbers from string values.
+
+This setting is disabled by default.
+
+**Example:**
+
+```sql
+SET input_format_json_try_infer_numbers_from_strings = 1;
+DESC format(CSV, '"42","42.42"');
+```
+```reponse
+┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ c1   │ Nullable(Int64)   │              │                    │         │                  │                │
+│ c2   │ Nullable(Float64) │              │                    │         │                  │                │
+└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
+
 ### TSV/TSKV {#tsv-tskv}
 
 In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 344e6dda680..3d76bd9df73 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -1130,6 +1130,13 @@ Result
 a  0  1971-01-01
 ```
 
+## input_format_csv_try_infer_numbers_from_strings {#input_format_csv_try_infer_numbers_from_strings}
+
+If enabled, during schema inference ClickHouse will try to infer numbers from string fields.
+It can be useful if CSV data contains quoted UInt64 numbers.
+
+Disabled by default.
+
 ## Values format settings {#values-format-settings}
 
 ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}

From eaf2c347616913c2e12a87c445381ff1bdb25b00 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 11 Dec 2023 18:35:38 +0000
Subject: [PATCH 137/213] better lowcard handling in hash join

---
 src/Interpreters/HashJoin.cpp | 67 +++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 22 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index ce4236317ac..078797910af 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -271,6 +271,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
 
     materializeBlockInplace(right_table_keys);
     initRightBlockStructure(data->sample_block);
+    data->sample_block = prepareRightBlock(data->sample_block);
 
     JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
 
@@ -806,7 +807,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
     for (const auto & column_name : right_key_names)
     {
         const auto & column = source_block.getByName(column_name).column;
-        all_key_columns[column_name] = recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst()));
+        all_key_columns[column_name] = recursiveRemoveSparse(column->convertToFullColumnIfConst())->convertToFullColumnIfLowCardinality();
     }
 
     Block block_to_save = prepareRightBlock(source_block);
@@ -821,7 +822,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
 
         data->blocks_allocated_size += block_to_save.allocatedBytes();
 
-        assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "Saved joined block structure mismatch");
+        assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "joined block");
         data->blocks.emplace_back(std::move(block_to_save));
         Block * stored_block = &data->blocks.back();
 
@@ -1030,16 +1031,15 @@ public:
     };
 
     AddedColumns(
+        const Block & left_block,
         const Block & block_with_columns_to_add,
-        const Block & block,
         const Block & saved_block_sample,
         const HashJoin & join,
         std::vector<JoinOnKeyColumns> && join_on_keys_,
         bool is_asof_join,
         bool is_join_get_)
         : join_on_keys(join_on_keys_)
-        , rows_to_add(block.rows())
-        , sample_block(saved_block_sample)
+        , rows_to_add(left_block.rows())
         , is_join_get(is_join_get_)
     {
         size_t num_columns_to_add = block_with_columns_to_add.columns();
@@ -1056,7 +1056,7 @@ public:
             /// because it uses not qualified right block column names
             auto qualified_name = join.getTableJoin().renamedRightColumnName(src_column.name);
             /// Don't insert column if it's in left block
-            if (!block.has(qualified_name))
+            if (!left_block.has(qualified_name))
                 addColumn(src_column, qualified_name);
         }
 
@@ -1070,6 +1070,17 @@ public:
 
         for (auto & tn : type_name)
             right_indexes.push_back(saved_block_sample.getPositionByName(tn.name));
+
+        nullable_column_ptrs.resize(right_indexes.size(), nullptr);
+        for (size_t j = 0; j < right_indexes.size(); ++j)
+        {
+            /** If it's joinGetOrNull, we will have nullable columns in result block
+              * even if right column is not nullable in storage (saved_block_sample).
+              */
+            const auto & saved_column = saved_block_sample.getByPosition(right_indexes[j]).column;
+            if (columns[j]->isNullable() && !saved_column->isNullable())
+                nullable_column_ptrs[j] = typeid_cast<ColumnNullable *>(columns[j].get());
+        }
     }
 
     size_t size() const { return columns.size(); }
@@ -1086,32 +1097,43 @@ public:
         if constexpr (has_defaults)
             applyLazyDefaults();
 
+#ifndef NDEBUG
+        for (size_t j = 0; j < right_indexes.size(); ++j)
+        {
+            const auto & column_from_block = block.getByPosition(right_indexes[j]);
+            const auto * dest_column = columns[j].get();
+            if (auto * nullable_col = nullable_column_ptrs[j])
+            {
+                if (!is_join_get)
+                    throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Columns {} and {} can have different nullability only in joinGetOrNull",
+                        dest_column->getName(), column_from_block.column->getName());
+                dest_column = nullable_col->getNestedColumnPtr().get();
+            }
+            if (!dest_column->structureEquals(*column_from_block.column))
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} are not structure equals", dest_column->getName(), column_from_block.column->getName());
+        }
+#endif
+
         if (is_join_get)
         {
-            /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin.
-            for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
+            size_t right_indexes_size = right_indexes.size();
+            for (size_t j = 0; j < right_indexes_size; ++j)
             {
                 const auto & column_from_block = block.getByPosition(right_indexes[j]);
-                if (auto * nullable_col = typeid_cast<ColumnNullable *>(columns[j].get());
-                    nullable_col && !column_from_block.column->isNullable())
+                if (auto * nullable_col = nullable_column_ptrs[j])
                     nullable_col->insertFromNotNullable(*column_from_block.column, row_num);
-                else if (auto * lowcard_col = typeid_cast<ColumnLowCardinality *>(columns[j].get());
-                         lowcard_col && !typeid_cast<const ColumnLowCardinality *>(column_from_block.column.get()))
-                    lowcard_col->insertFromFullColumn(*column_from_block.column, row_num);
                 else
                     columns[j]->insertFrom(*column_from_block.column, row_num);
             }
         }
         else
         {
-            for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
+            size_t right_indexes_size = right_indexes.size();
+            for (size_t j = 0; j < right_indexes_size; ++j)
             {
                 const auto & column_from_block = block.getByPosition(right_indexes[j]);
-                if (auto * lowcard_col = typeid_cast<ColumnLowCardinality *>(columns[j].get());
-                    lowcard_col && !typeid_cast<const ColumnLowCardinality *>(column_from_block.column.get()))
-                    lowcard_col->insertFromFullColumn(*column_from_block.column, row_num);
-                else
-                    columns[j]->insertFrom(*column_from_block.column, row_num);
+                columns[j]->insertFrom(*column_from_block.column, row_num);
             }
         }
     }
@@ -1142,11 +1164,12 @@ public:
 private:
     std::vector<TypeAndName> type_name;
     MutableColumns columns;
+    std::vector<ColumnNullable *> nullable_column_ptrs;
+
     std::vector<size_t> right_indexes;
     size_t lazy_defaults_count = 0;
     /// for ASOF
     const IColumn * left_asof_key = nullptr;
-    Block sample_block;
 
     bool is_join_get;
 
@@ -1601,8 +1624,8 @@ void HashJoin::joinBlockImpl(
       * For ASOF, the last column is used as the ASOF column
       */
     AddedColumns added_columns(
-        block_with_columns_to_add,
         block,
+        block_with_columns_to_add,
         savedBlockSample(),
         *this,
         std::move(join_on_keys),
@@ -1811,7 +1834,7 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block
     std::vector<const MapsOne *> maps_vector;
     maps_vector.push_back(&std::get<MapsOne>(data->maps[0]));
     joinBlockImpl<JoinKind::Left, JoinStrictness::Any>(
-        keys, block_with_columns_to_add, maps_vector, true);
+        keys, block_with_columns_to_add, maps_vector, /* is_join_get = */ true);
     return keys.getByPosition(keys.columns() - 1);
 }
 

From 76be22e8e87630eddd9a97363fdcb8291acdedd5 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Dec 2023 19:55:14 +0100
Subject: [PATCH 138/213] Fix build without libpqxx

---
 src/Databases/DDLLoadingDependencyVisitor.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp
index 8f1b82941f6..b8690125aaa 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@@ -1,7 +1,10 @@
 #include <Databases/DDLLoadingDependencyVisitor.h>
 #include <Databases/DDLDependencyVisitor.h>
 #include <Dictionaries/getDictionaryConfigurationFromAST.h>
+#include "config.h"
+#if USE_LIBPQXX
 #include <Storages/PostgreSQL/StorageMaterializedPostgreSQL.h>
+#endif
 #include <Interpreters/Context.h>
 #include <Interpreters/misc.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -132,12 +135,14 @@ void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data)
         extractTableNameFromArgument(*storage.engine, data, 3);
     else if (storage.engine->name == "Dictionary")
         extractTableNameFromArgument(*storage.engine, data, 0);
+#if USE_LIBPQXX
     else if (storage.engine->name == "MaterializedPostgreSQL")
     {
         const auto * create_query = data.create_query->as<ASTCreateQuery>();
         auto nested_table = toString(create_query->uuid) + StorageMaterializedPostgreSQL::NESTED_TABLE_SUFFIX;
         data.dependencies.emplace(QualifiedTableName{ .database = create_query->getDatabase(), .table = nested_table });
     }
+#endif
 }
 
 

From 30dabd42d35e4dea6805b6340f3fd5edd8561d59 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Dec 2023 19:37:42 +0000
Subject: [PATCH 139/213] One lookup for 2 checks

(1) if it's the same part
(2) if annouced part has intersections with parts in working set
---
 .../ParallelReplicasReadingCoordinator.cpp    | 38 ++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 44d10eda21e..d81f5dd41ce 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -164,22 +164,34 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc
     for (auto && part_ranges: announcement.description)
     {
         Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}};
-        auto the_same_it = all_parts_to_read.find(part);
-        /// We have the same part - add the info about presence on current replica to it
-        if (the_same_it != all_parts_to_read.end())
+
+        auto it = std::lower_bound(cbegin(all_parts_to_read), cend(all_parts_to_read), part);
+        if (it != all_parts_to_read.cend())
         {
-            the_same_it->replicas.insert(announcement.replica_num);
-            continue;
+            const MergeTreePartInfo & announced_part = part.description.info;
+            const MergeTreePartInfo & found_part = it->description.info;
+            if (found_part == announced_part)
+            {
+                /// We have the same part - add the info about presence on current replica
+                it->replicas.insert(announcement.replica_num);
+                continue;
+            }
+            else
+            {
+                /// check if it is covering or covered part
+                /// need to compare with 2 nearest parts in set, - lesser and greater than the part from the announcement
+                bool is_disjoint = found_part.isDisjoint(announced_part);
+                if (it != all_parts_to_read.cbegin() && is_disjoint)
+                {
+                    const MergeTreePartInfo & lesser_part_info = (--it)->description.info;
+                    is_disjoint &= lesser_part_info.isDisjoint(announced_part);
+                }
+                if (!is_disjoint)
+                    continue;
+            }
         }
 
-        auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
-            [&part] (const Part & other) { return !other.description.info.isDisjoint(part.description.info); });
-
-        /// It is covering part or we have covering - skip it
-        if (covering_or_the_same_it != all_parts_to_read.end())
-            continue;
-
-        auto [insert_it, _] = all_parts_to_read.emplace(part);
+        auto [insert_it, _] = all_parts_to_read.emplace(std::move(part));
         parts_diff.push_back(insert_it);
     }
 

From 1e0d8e098fcbf2a70194ce4b8a87465566140de0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 20:48:48 +0100
Subject: [PATCH 140/213] Fix building Rust with Musl

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 cmake/target.cmake  | 5 -----
 rust/CMakeLists.txt | 4 ++++
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/cmake/target.cmake b/cmake/target.cmake
index 1680715d15f..0d6993142b3 100644
--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@@ -73,8 +73,3 @@ if (CMAKE_CROSSCOMPILING)
 
     message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
 endif ()
-
-if (USE_MUSL)
-    # Does not work for unknown reason
-    set (ENABLE_RUST OFF CACHE INTERNAL "")
-endif ()
diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt
index 6aa25e95679..5ea806baa3b 100644
--- a/rust/CMakeLists.txt
+++ b/rust/CMakeLists.txt
@@ -14,6 +14,10 @@ macro(configure_rustc)
         set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}")
     endif()
 
+    if (USE_MUSL)
+        set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -D_LIBCPP_HAS_MUSL_LIBC=1")
+    endif ()
+
     if(CCACHE_EXECUTABLE MATCHES "/sccache$")
         message(STATUS "Using RUSTC_WRAPPER: ${CCACHE_EXECUTABLE}")
         set(RUSTCWRAPPER "rustc-wrapper = \"${CCACHE_EXECUTABLE}\"")

From c007025ee3c06779b3a69903f6b10de21bd20bf1 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Dec 2023 19:30:33 +0000
Subject: [PATCH 141/213] Fix flaky
 test_parallel_replicas_distributed_read_from_all

---
 .../test_parallel_replicas_distributed_read_from_all/test.py    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
index fa1dfbefe52..88dabedb3f5 100644
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
@@ -143,6 +143,8 @@ def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica)
     nodes[0].query(f"system start fetches {table_name}")
     nodes[1].query(f"system start fetches {table_name}")
     nodes[2].query(f"system start fetches {table_name}")
+    # ensure that replica in sync before querying it to get stable result
+    nodes[0].query(f"system sync  replica {table_name} strict")
     assert (
         nodes[0].query(
             f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d"

From bfc2a7bb752cdb231ebfd53b0fc7d71928ba99ae Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 11 Dec 2023 21:27:48 +0100
Subject: [PATCH 142/213] Update cache only after conflicts

---
 src/Storages/MergeTree/AsyncBlockIDsCache.cpp | 32 +++++++------------
 src/Storages/MergeTree/AsyncBlockIDsCache.h   |  4 +--
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +-
 .../MergeTree/ReplicatedMergeTreeSink.cpp     |  5 ++-
 4 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
index 4f3a8f16366..c2e7b4cbad5 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
@@ -18,6 +18,7 @@ namespace CurrentMetrics
 namespace DB
 {
 
+static constexpr int FAILURE_RETRY_MS = 3000;
 
 template <typename TStorage>
 struct AsyncBlockIDsCache<TStorage>::Cache : public std::unordered_set<String>
@@ -33,24 +34,7 @@ template <typename TStorage>
 std::vector<String> AsyncBlockIDsCache<TStorage>::getChildren()
 {
     auto zookeeper = storage.getZooKeeper();
-
-    auto watch_callback = [last_time = this->last_updatetime.load()
-                           , my_update_min_interval = this->update_min_interval
-                           , my_task = task->shared_from_this()](const Coordination::WatchResponse &)
-    {
-        auto now = std::chrono::steady_clock::now();
-        if (now - last_time < my_update_min_interval)
-        {
-            std::chrono::milliseconds sleep_time = std::chrono::duration_cast<std::chrono::milliseconds>(my_update_min_interval - (now - last_time));
-            my_task->scheduleAfter(sleep_time.count());
-        }
-        else
-            my_task->schedule();
-    };
-    std::vector<String> children;
-    Coordination::Stat stat;
-    zookeeper->tryGetChildrenWatch(path, children, &stat, watch_callback);
-    return children;
+    return zookeeper->getChildren(path);
 }
 
 template <typename TStorage>
@@ -69,18 +53,16 @@ try
         ++version;
     }
     cv.notify_all();
-    last_updatetime = std::chrono::steady_clock::now();
 }
 catch (...)
 {
     LOG_INFO(log, "Updating async block ids cache failed. Reason: {}", getCurrentExceptionMessage(false));
-    task->scheduleAfter(update_min_interval.count());
+    task->scheduleAfter(FAILURE_RETRY_MS);
 }
 
 template <typename TStorage>
 AsyncBlockIDsCache<TStorage>::AsyncBlockIDsCache(TStorage & storage_)
     : storage(storage_)
-    , update_min_interval(storage.getSettings()->async_block_ids_cache_min_update_interval_ms)
     , update_wait(storage.getSettings()->async_block_ids_cache_update_wait_ms)
     , path(storage.getZooKeeperPath() + "/async_blocks")
     , log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)")
@@ -96,6 +78,14 @@ void AsyncBlockIDsCache<TStorage>::start()
         task->activateAndSchedule();
 }
 
+template <typename TStorage>
+void AsyncBlockIDsCache<TStorage>::triggerCacheUpdate()
+{
+    /// Trigger task update
+    if (!task->schedule())
+        LOG_TRACE(log, "Task is already scheduled, will wait for update for {}ms", update_wait.count());
+}
+
 /// Caller will keep the version of last call. When the caller calls again, it will wait util gets a newer version.
 template <typename TStorage>
 Strings AsyncBlockIDsCache<TStorage>::detectConflicts(const Strings & paths, UInt64 & last_version)
diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h
index 8ce65ec4927..d505b0fe8be 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.h
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h
@@ -27,12 +27,12 @@ public:
 
     Strings detectConflicts(const Strings & paths, UInt64 & last_version);
 
+    void triggerCacheUpdate();
+
 private:
 
     TStorage & storage;
 
-    std::atomic<std::chrono::steady_clock::time_point> last_updatetime;
-    const std::chrono::milliseconds update_min_interval;
     const std::chrono::milliseconds update_wait;
 
     std::mutex mu;
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index c71951503c8..b921458feab 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -95,7 +95,6 @@ struct Settings;
     M(UInt64, replicated_deduplication_window_seconds, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \
     M(UInt64, replicated_deduplication_window_for_async_inserts, 10000, "How many last hash values of async_insert blocks should be kept in ZooKeeper (old blocks will be deleted).", 0) \
     M(UInt64, replicated_deduplication_window_seconds_for_async_inserts, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window_for_async_inserts\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \
-    M(Milliseconds, async_block_ids_cache_min_update_interval_ms, 1000, "Minimum interval between updates of async_block_ids_cache", 0) \
     M(Milliseconds, async_block_ids_cache_update_wait_ms, 100, "How long each insert iteration will wait for async_block_ids_cache update", 0) \
     M(Bool, use_async_block_ids_cache, true, "Use in-memory cache to filter duplicated async inserts based on block ids", 0) \
     M(UInt64, max_replicated_logs_to_keep, 1000, "How many records may be in log, if there is inactive replica. Inactive replica becomes lost when when this number exceed.", 0) \
@@ -215,6 +214,7 @@ struct Settings;
     MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Bool, use_metadata_cache, false) \
     MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, merge_tree_enable_clear_old_broken_detached, 0) \
     MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30) \
+    MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, async_block_ids_cache_min_update_interval_ms, 1000) \
 
     /// Settings that should not change after the creation of a table.
     /// NOLINTNEXTLINE
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 4fa473da813..9fb575c0213 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -456,7 +456,7 @@ void ReplicatedMergeTreeSinkImpl<true>::finishDelayedChunk(const ZooKeeperWithFa
     if (!delayed_chunk)
         return;
 
-    for (auto & partition: delayed_chunk->partitions)
+    for (auto & partition : delayed_chunk->partitions)
     {
         int retry_times = 0;
         /// users may have lots of same inserts. It will be helpful to deduplicate in advance.
@@ -469,6 +469,7 @@ void ReplicatedMergeTreeSinkImpl<true>::finishDelayedChunk(const ZooKeeperWithFa
         }
 
         /// reset the cache version to zero for every partition write.
+        /// Version zero allows to avoid wait on first iteration
         cache_version = 0;
         while (true)
         {
@@ -476,6 +477,8 @@ void ReplicatedMergeTreeSinkImpl<true>::finishDelayedChunk(const ZooKeeperWithFa
             auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num, false).first;
             if (conflict_block_ids.empty())
                 break;
+
+            storage.async_block_ids_cache.triggerCacheUpdate();
             ++retry_times;
             LOG_DEBUG(log, "Found duplicate block IDs: {}, retry times {}", toString(conflict_block_ids), retry_times);
             /// partition clean conflict

From 7cdc3d29fbc6693cf98eac943723cb2bbb2f4fed Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 11 Dec 2023 22:29:51 +0100
Subject: [PATCH 143/213] Get rid of tiny function

---
 src/Storages/MergeTree/AsyncBlockIDsCache.cpp      | 14 +++++---------
 src/Storages/MergeTree/AsyncBlockIDsCache.h        |  2 --
 src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp |  2 +-
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
index c2e7b4cbad5..cc3bc8fc2a8 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp
@@ -30,18 +30,12 @@ struct AsyncBlockIDsCache<TStorage>::Cache : public std::unordered_set<String>
     {}
 };
 
-template <typename TStorage>
-std::vector<String> AsyncBlockIDsCache<TStorage>::getChildren()
-{
-    auto zookeeper = storage.getZooKeeper();
-    return zookeeper->getChildren(path);
-}
-
 template <typename TStorage>
 void AsyncBlockIDsCache<TStorage>::update()
 try
 {
-    std::vector<String> paths = getChildren();
+    auto zookeeper = storage.getZooKeeper();
+    std::vector<String> paths = zookeeper->getChildren(path);
     std::unordered_set<String> set;
     for (String & p : paths)
     {
@@ -81,7 +75,9 @@ void AsyncBlockIDsCache<TStorage>::start()
 template <typename TStorage>
 void AsyncBlockIDsCache<TStorage>::triggerCacheUpdate()
 {
-    /// Trigger task update
+    /// Trigger task update. Watch-based updates may produce a lot of
+    /// redundant work in case of multiple replicas, so we use manually controlled updates
+    /// in case of duplicates
     if (!task->schedule())
         LOG_TRACE(log, "Task is already scheduled, will wait for update for {}ms", update_wait.count());
 }
diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h
index d505b0fe8be..38c38da0033 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.h
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h
@@ -14,8 +14,6 @@ class AsyncBlockIDsCache
     struct Cache;
     using CachePtr = std::shared_ptr<Cache>;
 
-    std::vector<String> getChildren();
-
     void update();
 
 public:
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 9fb575c0213..90cbe6b797d 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -307,7 +307,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
         auto profile_events_scope = std::make_unique<ProfileEventsScope>(&part_counters);
 
         /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets
-        /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block
+        /// when preprocessing or filtering data for async inserts deduplication we want to use the initial, unmerged block
         std::optional<BlockWithPartition> unmerged_block;
 
         if constexpr (async_insert)

From da43d49ffc7b637baca40fd9fbdac606a4033ac4 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Dec 2023 21:30:25 +0000
Subject: [PATCH 144/213] Fix: sync is failed with stopped merges

---
 .../test.py                                               | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
index 88dabedb3f5..58abe1247e6 100644
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
@@ -144,10 +144,14 @@ def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica)
     nodes[1].query(f"system start fetches {table_name}")
     nodes[2].query(f"system start fetches {table_name}")
     # ensure that replica in sync before querying it to get stable result
-    nodes[0].query(f"system sync  replica {table_name} strict")
+    nodes[0].query(f"system start merges {table_name}")
+    nodes[0].query(f"system sync  replica {table_name}")
     assert (
         nodes[0].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d"
+            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
+            settings={
+                "allow_experimental_parallel_reading_from_replicas": 0,
+            }
         )
         == expected_result
     )

From e95041a3afcdbc8473b44f537eca254e2a413e7d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 11 Dec 2023 21:40:23 +0000
Subject: [PATCH 145/213] Automatic style fix

---
 .../test_parallel_replicas_distributed_read_from_all/test.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
index 58abe1247e6..7e12da956ea 100644
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
@@ -151,7 +151,7 @@ def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica)
             f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 0,
-            }
+            },
         )
         == expected_result
     )

From fcb8ab9b6375907cf0b0e5ee498f11972925d503 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 00:42:23 +0300
Subject: [PATCH 146/213] Update tests/ci/functional_test_check.py

Co-authored-by: Mikhail f. Shiryaev <felixoid@clickhouse.com>
---
 tests/ci/functional_test_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index c8b3e42eed4..0dea2c5476f 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -169,7 +169,7 @@ def process_results(
     status = []
     status_path = result_directory / "check_status.tsv"
     if status_path.exists():
-        logging.info("Found check_status.tsv")
+        logging.info("Found %s", status_path.name)
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
 

From b9b6e7584e547a358098a6a174f302cb2d7b7774 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 00:42:41 +0300
Subject: [PATCH 147/213] Update tests/ci/fast_test_check.py

Co-authored-by: Mikhail f. Shiryaev <felixoid@clickhouse.com>
---
 tests/ci/fast_test_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index f5c7342d6f4..265fc81ccb3 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -72,7 +72,7 @@ def process_results(result_directory: Path) -> Tuple[str, str, TestResults]:
     status = []
     status_path = result_directory / "check_status.tsv"
     if status_path.exists():
-        logging.info("Found check_status.tsv")
+        logging.info("Found %s", status_path.name)
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
     if len(status) != 1 or len(status[0]) != 2:

From a3262003f3bba9921e160d1abf8a05027b92a69d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 00:42:47 +0300
Subject: [PATCH 148/213] Update tests/ci/integration_test_check.py

Co-authored-by: Mikhail f. Shiryaev <felixoid@clickhouse.com>
---
 tests/ci/integration_test_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index b22aa08354c..e49cec6d694 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -118,7 +118,7 @@ def process_results(
     status = []
     status_path = result_directory / "check_status.tsv"
     if status_path.exists():
-        logging.info("Found check_status.tsv")
+        logging.info("Found %s", status_path.name)
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
 

From 0c81892463428e06b30b1d7a3a9729f86618a3a3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 00:43:06 +0300
Subject: [PATCH 149/213] Update tests/ci/clickbench.py

Co-authored-by: Mikhail f. Shiryaev <felixoid@clickhouse.com>
---
 tests/ci/clickbench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 9c700adb398..359c10eeb9d 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -94,7 +94,7 @@ def process_results(
         results_path = result_directory / "test_results.tsv"
 
         if results_path.exists():
-            logging.info("Found test_results.tsv")
+            logging.info("Found %s", results_path.name)
         else:
             logging.info("Files in result folder %s", os.listdir(result_directory))
             return "error", "Not found test_results.tsv", test_results, additional_files

From bb4d9f7ae5fc7cef1442ac2d27657fcbad92f916 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 11 Dec 2023 15:27:41 +0000
Subject: [PATCH 150/213] Refactor toStartOfInterval()

---
 src/Functions/toStartOfInterval.cpp | 200 ++++++++++++++--------------
 1 file changed, 100 insertions(+), 100 deletions(-)

diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp
index 48f60dddb33..ea0ad139481 100644
--- a/src/Functions/toStartOfInterval.cpp
+++ b/src/Functions/toStartOfInterval.cpp
@@ -1,7 +1,7 @@
-#include <base/arithmeticOverflow.h>
-#include <Common/DateLUTImpl.h>
 #include <Columns/ColumnsDateTime.h>
 #include <Columns/ColumnsNumber.h>
+#include <Common/DateLUTImpl.h>
+#include <Common/IntervalKind.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -11,6 +11,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <IO/WriteHelpers.h>
+#include <base/arithmeticOverflow.h>
 
 
 namespace DB
@@ -24,9 +25,6 @@ namespace ErrorCodes
 }
 
 
-namespace
-{
-
 class FunctionToStartOfInterval : public IFunction
 {
 public:
@@ -34,86 +32,90 @@ public:
 
     static constexpr auto name = "toStartOfInterval";
     String getName() const override { return name; }
-
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
-
     bool useDefaultImplementationForConstants() const override { return true; }
     ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
-
     bool hasInformationAboutMonotonicity() const override { return true; }
-    Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
-    {
-        return { .is_monotonic = true, .is_always_monotonic = true };
-    }
+    Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override { return { .is_monotonic = true, .is_always_monotonic = true }; }
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        bool first_argument_is_date = false;
+        bool value_is_date = false;
         auto check_first_argument = [&]
         {
-            if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. "
-                    "Should be a date or a date with time", arguments[0].type->getName(), getName());
-            first_argument_is_date = isDate(arguments[0].type);
+            const DataTypePtr & type_arg1 = arguments[0].type;
+            if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of 1st argument of function {}, expected a Date, DateTime or DateTime64",
+                    type_arg1->getName(), getName());
+            value_is_date = isDate(type_arg1);
         };
 
         const DataTypeInterval * interval_type = nullptr;
-        bool result_type_is_date = false;
-        bool result_type_is_datetime = false;
-        bool result_type_is_datetime_64 = false;
-        auto check_interval_argument = [&]
+        enum class ResultType
         {
-            interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get());
+            Date,
+            DateTime,
+            DateTime64
+        };
+        ResultType result_type;
+        auto check_second_argument = [&]
+        {
+            const DataTypePtr & type_arg2 = arguments[1].type;
+
+            interval_type = checkAndGetDataType<DataTypeInterval>(type_arg2.get());
             if (!interval_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. "
-                    "Should be an interval of time", arguments[1].type->getName(), getName());
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of 2nd argument of function {}, expected a time interval",
+                    type_arg2->getName(), getName());
+
             switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case)
             {
                 case IntervalKind::Nanosecond:
                 case IntervalKind::Microsecond:
                 case IntervalKind::Millisecond:
-                    result_type_is_datetime_64 = true;
+                    result_type = ResultType::DateTime64;
                     break;
                 case IntervalKind::Second:
                 case IntervalKind::Minute:
                 case IntervalKind::Hour:
-                case IntervalKind::Day:
-                    result_type_is_datetime = true;
+                case IntervalKind::Day: /// weird why Day leads to DateTime but too afraid to change it
+                    result_type = ResultType::DateTime;
                     break;
                 case IntervalKind::Week:
                 case IntervalKind::Month:
                 case IntervalKind::Quarter:
                 case IntervalKind::Year:
-                    result_type_is_date = true;
+                    result_type = ResultType::Date;
                     break;
             }
         };
 
-        auto check_timezone_argument = [&]
+        auto check_third_argument = [&]
         {
-            if (!isString(arguments[2].type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. "
-                    "This argument is optional and must be a constant string with timezone name",
-                    arguments[2].type->getName(), getName());
-            if (first_argument_is_date && result_type_is_date)
+            const DataTypePtr & type_arg3 = arguments[2].type;
+            if (!isString(type_arg3))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "The timezone argument of function {} with interval type {} is allowed only when the 1st argument "
-                    "has the type DateTime or DateTime64",
-                        getName(), interval_type->getKind().toString());
+                    "Illegal type {} of 3rd argument of function {}, expected a constant timezone string",
+                    type_arg3->getName(), getName());
+            if (value_is_date && result_type == ResultType::Date) /// weird why this is && instead of || but too afraid to change it
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has type DateTime or DateTimt64",
+                    getName(), interval_type->getKind().toString());
         };
 
         if (arguments.size() == 2)
         {
             check_first_argument();
-            check_interval_argument();
+            check_second_argument();
         }
         else if (arguments.size() == 3)
         {
             check_first_argument();
-            check_interval_argument();
-            check_timezone_argument();
+            check_second_argument();
+            check_third_argument();
         }
         else
         {
@@ -122,25 +124,27 @@ public:
                 getName(), arguments.size());
         }
 
-        if (result_type_is_date)
-            return std::make_shared<DataTypeDate>();
-        else if (result_type_is_datetime)
-            return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
-        else if (result_type_is_datetime_64)
+        switch (result_type)
         {
-            auto scale = 0;
+            case ResultType::Date:
+                return std::make_shared<DataTypeDate>();
+            case ResultType::DateTime:
+                return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
+            case ResultType::DateTime64:
+            {
+                UInt32 scale = 0;
+                if (interval_type->getKind() == IntervalKind::Nanosecond)
+                    scale = 9;
+                else if (interval_type->getKind() == IntervalKind::Microsecond)
+                    scale = 6;
+                else if (interval_type->getKind() == IntervalKind::Millisecond)
+                    scale = 3;
 
-            if (interval_type->getKind() == IntervalKind::Nanosecond)
-                scale = 9;
-            else if (interval_type->getKind() == IntervalKind::Microsecond)
-                scale = 6;
-            else if (interval_type->getKind() == IntervalKind::Millisecond)
-                scale = 3;
-
-            return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
+                return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
+            }
         }
 
-        UNREACHABLE();
+        std::unreachable();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
@@ -154,110 +158,106 @@ public:
 
 private:
     ColumnPtr dispatchForTimeColumn(
-        const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
+        const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column,
+        const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
     {
-        const auto & from_datatype = *time_column.type.get();
+        const auto & time_column_type = *time_column.type.get();
+        const auto & time_column_col = *time_column.column.get();
 
-        if (isDateTime64(from_datatype))
+        if (isDateTime64(time_column_type))
         {
-            const auto * time_column_vec = checkAndGetColumn<ColumnDateTime64>(time_column.column.get());
-            auto scale = assert_cast<const DataTypeDateTime64 &>(from_datatype).getScale();
+            const auto * time_column_vec = checkAndGetColumn<ColumnDateTime64>(time_column_col);
+            auto scale = assert_cast<const DataTypeDateTime64 &>(time_column_type).getScale();
 
             if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64 &>(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale);
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64 &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale);
         }
-        if (isDateTime(from_datatype))
+        else if (isDateTime(time_column_type))
         {
-            const auto * time_column_vec = checkAndGetColumn<ColumnDateTime>(time_column.column.get());
+            const auto * time_column_vec = checkAndGetColumn<ColumnDateTime>(time_column_col);
             if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime &>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone);
         }
-        if (isDate(from_datatype))
+        else if (isDate(time_column_type))
         {
-            const auto * time_column_vec = checkAndGetColumn<ColumnDate>(time_column.column.get());
+            const auto * time_column_vec = checkAndGetColumn<ColumnDate>(time_column_col);
             if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDate &>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDate &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone);
         }
-        if (isDate32(from_datatype))
-        {
-            const auto * time_column_vec = checkAndGetColumn<ColumnDate32>(time_column.column.get());
-            if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDate32 &>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
-        }
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName());
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName());
     }
 
-    template <typename TimeColumnType, typename TimeDataType>
+    template <typename TimeDataType, typename TimeColumnType>
     ColumnPtr dispatchForIntervalColumn(
         const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column,
-        const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const
+        const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const
     {
         const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
         if (!interval_type)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName());
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName());
 
         const auto * interval_column_const_int64 = checkAndGetColumnConst<ColumnInt64>(interval_column.column.get());
         if (!interval_column_const_int64)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be a const interval of time.", getName());
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a const time interval", getName());
 
-        Int64 num_units = interval_column_const_int64->getValue<Int64>();
+        const Int64 num_units = interval_column_const_int64->getValue<Int64>();
         if (num_units <= 0)
-            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName());
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for 2nd argument of function {} must be positive", getName());
 
         switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case)
         {
             case IntervalKind::Nanosecond:
-                return execute<TimeDataType, DataTypeDateTime64, IntervalKind::Nanosecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime64, IntervalKind::Nanosecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Microsecond:
-                return execute<TimeDataType, DataTypeDateTime64, IntervalKind::Microsecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime64, IntervalKind::Microsecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Millisecond:
-                return execute<TimeDataType, DataTypeDateTime64, IntervalKind::Millisecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime64, IntervalKind::Millisecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Second:
-                return execute<TimeDataType, DataTypeDateTime, IntervalKind::Second>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Second>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Minute:
-                return execute<TimeDataType, DataTypeDateTime, IntervalKind::Minute>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Minute>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Hour:
-                return execute<TimeDataType, DataTypeDateTime, IntervalKind::Hour>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Hour>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Day:
-                return execute<TimeDataType, DataTypeDateTime, IntervalKind::Day>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Day>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Week:
-                return execute<TimeDataType, DataTypeDate, IntervalKind::Week>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Week>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Month:
-                return execute<TimeDataType, DataTypeDate, IntervalKind::Month>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Month>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Quarter:
-                return execute<TimeDataType, DataTypeDate, IntervalKind::Quarter>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Quarter>(time_data_type, time_column, num_units, result_type, time_zone, scale);
             case IntervalKind::Year:
-                return execute<TimeDataType, DataTypeDate, IntervalKind::Year>(time_data_type, time_column, num_units, result_type, time_zone, scale);
+                return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Year>(time_data_type, time_column, num_units, result_type, time_zone, scale);
         }
 
-        UNREACHABLE();
+        std::unreachable();
     }
 
-    template <typename TimeDataType, typename ToDataType, IntervalKind::Kind unit, typename ColumnType>
-    ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const
+    template <typename TimeDataType, typename TimeColumnType, typename ResultDataType, IntervalKind::Kind unit>
+    ColumnPtr execute(
+        const TimeDataType &, const TimeColumnType & time_column_type, Int64 num_units,
+        const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const
     {
-        using ToColumnType = typename ToDataType::ColumnType;
-        using ToFieldType = typename ToDataType::FieldType;
+        using ResultColumnType = typename ResultDataType::ColumnType;
+        using ResultFieldType = typename ResultDataType::FieldType;
 
         const auto & time_data = time_column_type.getData();
         size_t size = time_data.size();
 
         auto result_col = result_type->createColumn();
-        auto *col_to = assert_cast<ToColumnType *>(result_col.get());
+        auto * col_to = assert_cast<ResultColumnType *>(result_col.get());
         auto & result_data = col_to->getData();
         result_data.resize(size);
 
         Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
 
         for (size_t i = 0; i != size; ++i)
-            result_data[i] = static_cast<ToFieldType>(ToStartOfInterval<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier));
+            result_data[i] = static_cast<ResultFieldType>(ToStartOfInterval<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier));
 
         return result_col;
     }
 };
 
-}
-
 REGISTER_FUNCTION(ToStartOfInterval)
 {
     factory.registerFunction<FunctionToStartOfInterval>();

From 6b0936553c9d8c34ce32fef5b4f64669ae25946e Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 11 Dec 2023 22:58:39 +0100
Subject: [PATCH 151/213] Don't run test 02919_skip_lots_of_parsing_errors on
 aarch64

---
 tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh b/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh
index 2c54e9e68da..7ddb55fb39b 100755
--- a/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh
+++ b/tests/queries/0_stateless/02919_skip_lots_of_parsing_errors.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest
+# Tags: no-fasttest, no-cpu-aarch64
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 16afd81322a15e4cdde07ea0007d45bbdbccd1b9 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 12 Dec 2023 00:19:16 +0100
Subject: [PATCH 152/213] Fix retries for disconnected nodes for BACKUP/RESTORE
 ON CLUSTER.

---
 src/Backups/BackupCoordinationRemote.cpp    |   6 +-
 src/Backups/BackupCoordinationStageSync.cpp | 107 ++++++++------------
 src/Backups/BackupCoordinationStageSync.h   |   2 +-
 3 files changed, 43 insertions(+), 72 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 2633e1bedd2..7319b1aba58 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -184,11 +184,9 @@ BackupCoordinationRemote::BackupCoordinationRemote(
             if (my_is_internal)
             {
                 String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
+                zk->createAncestors(alive_node_path);
                 auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
-
-                if (code == Coordination::Error::ZNODEEXISTS)
-                    zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
-                else if (code != Coordination::Error::ZOK)
+                if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
                     throw zkutil::KeeperException::fromPath(code, alive_node_path);
             }
         })
diff --git a/src/Backups/BackupCoordinationStageSync.cpp b/src/Backups/BackupCoordinationStageSync.cpp
index 9b9ddc8515c..e4dac7dbbe9 100644
--- a/src/Backups/BackupCoordinationStageSync.cpp
+++ b/src/Backups/BackupCoordinationStageSync.cpp
@@ -60,12 +60,6 @@ void BackupCoordinationStageSync::set(const String & current_host, const String
         }
         else
         {
-            /// Make an ephemeral node so the initiator can track if the current host is still working.
-            String alive_node_path = zookeeper_path + "/alive|" + current_host;
-            auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
-            if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
-                throw zkutil::KeeperException::fromPath(code, alive_node_path);
-
             zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
             zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
         }
@@ -118,27 +112,24 @@ struct BackupCoordinationStageSync::State
     Strings results;
     std::map<String, UnreadyHostState> unready_hosts;
     std::optional<std::pair<String, Exception>> error;
-    std::optional<String> host_terminated;
+    std::optional<String> disconnected_host;
 };
 
 BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
-    const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
+    WithRetries::RetriesControlHolder & retries_control_holder,
+    const Strings & zk_nodes,
+    const Strings & all_hosts,
+    const String & stage_to_wait) const
 {
+    auto zookeeper = retries_control_holder.faulty_zookeeper;
+    auto & retries_ctl = retries_control_holder.retries_ctl;
+
     std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
 
     State state;
     if (zk_nodes_set.contains("error"))
     {
-        String errors;
-        {
-            auto holder = with_retries.createRetriesControlHolder("readCurrentState");
-            holder.retries_ctl.retryLoop(
-                [&, &zookeeper = holder.faulty_zookeeper]()
-                {
-                    with_retries.renewZooKeeper(zookeeper);
-                    errors = zookeeper->get(zookeeper_path + "/error");
-                });
-        }
+        String errors = zookeeper->get(zookeeper_path + "/error");
         ReadBufferFromOwnString buf{errors};
         String host;
         readStringBinary(host, buf);
@@ -150,60 +141,40 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
     {
         if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
         {
-            UnreadyHostState unready_host_state;
             const String started_node_name = "started|" + host;
             const String alive_node_name = "alive|" + host;
             const String alive_node_path = zookeeper_path + "/" + alive_node_name;
+
+            UnreadyHostState unready_host_state;
             unready_host_state.started = zk_nodes_set.contains(started_node_name);
-
-            /// Because we do retries everywhere we can't fully rely on ephemeral nodes anymore.
-            /// Though we recreate "alive" node when reconnecting it might be not enough and race condition is possible.
-            /// And everything we can do here - just retry.
-            /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
             unready_host_state.alive = zk_nodes_set.contains(alive_node_name);
-            if (!unready_host_state.alive)
-            {
-                LOG_TRACE(log, "Seems like host ({}) is dead. Will retry the check to confirm", host);
-                auto holder = with_retries.createRetriesControlHolder("readCurrentState::checkAliveNode");
-                holder.retries_ctl.retryLoop(
-                    [&, &zookeeper = holder.faulty_zookeeper]()
-                {
-                    with_retries.renewZooKeeper(zookeeper);
-
-                    if (zookeeper->existsNoFailureInjection(alive_node_path))
-                    {
-                        unready_host_state.alive = true;
-                        return;
-                    }
-
-                    // Retry with backoff. We also check whether it is last retry or no, because we won't to rethrow an exception.
-                    if (!holder.retries_ctl.isLastRetry())
-                        holder.retries_ctl.setKeeperError(Coordination::Error::ZNONODE, "There is no alive node for host {}. Will retry", host);
-                });
-            }
-            LOG_TRACE(log, "Host ({}) appeared to be {}", host, unready_host_state.alive ? "alive" : "dead");
-
             state.unready_hosts.emplace(host, unready_host_state);
-            if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
-                state.host_terminated = host;
+
+            if (!unready_host_state.alive && !state.disconnected_host)
+            {
+                /// If the "alive" node doesn't exist then we don't have connection to the corresponding host.
+                /// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait.
+                /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
+                state.disconnected_host = host;
+                String message;
+                if (unready_host_state.started)
+                    message = fmt::format("Lost connection to host {}", host);
+                else
+                    message = fmt::format("No connection to host {} yet", host);
+                if (!retries_ctl.isLastRetry())
+                    message += ", will retry";
+                retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message);
+            }
         }
     }
 
-    if (state.host_terminated || !state.unready_hosts.empty())
+    if (state.disconnected_host || !state.unready_hosts.empty())
         return state;
 
-    auto holder = with_retries.createRetriesControlHolder("waitImpl::collectStagesToWait");
-    holder.retries_ctl.retryLoop(
-        [&, &zookeeper = holder.faulty_zookeeper]()
-    {
-        with_retries.renewZooKeeper(zookeeper);
-        Strings results;
-
-        for (const auto & host : all_hosts)
-            results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
-
-        state.results = std::move(results);
-    });
+    Strings results;
+    for (const auto & host : all_hosts)
+        results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
+    state.results = std::move(results);
 
     return state;
 }
@@ -229,7 +200,7 @@ Strings BackupCoordinationStageSync::waitImpl(
         auto watch = std::make_shared<Poco::Event>();
         Strings zk_nodes;
         {
-            auto holder = with_retries.createRetriesControlHolder("waitImpl::getChildren");
+            auto holder = with_retries.createRetriesControlHolder("waitImpl");
             holder.retries_ctl.retryLoop(
                 [&, &zookeeper = holder.faulty_zookeeper]()
             {
@@ -237,12 +208,14 @@ Strings BackupCoordinationStageSync::waitImpl(
                 watch->reset();
                 /// Get zk nodes and subscribe on their changes.
                 zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
+
+                /// Read the current state of zk nodes.
+                state = readCurrentState(holder, zk_nodes, all_hosts, stage_to_wait);
             });
         }
 
-        /// Read and analyze the current state of zk nodes.
-        state = readCurrentState(zk_nodes, all_hosts, stage_to_wait);
-        if (state.error || state.host_terminated || state.unready_hosts.empty())
+        /// Analyze the current state of zk nodes.
+        if (state.error || state.disconnected_host || state.unready_hosts.empty())
             break; /// Error happened or everything is ready.
 
         /// Log that we will wait
@@ -270,8 +243,8 @@ Strings BackupCoordinationStageSync::waitImpl(
         state.error->second.rethrow();
 
     /// Another host terminated without errors.
-    if (state.host_terminated)
-        throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated);
+    if (state.disconnected_host)
+        throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "No connection to host {}", *state.disconnected_host);
 
     /// Something's unready, timeout is probably not enough.
     if (!state.unready_hosts.empty())
diff --git a/src/Backups/BackupCoordinationStageSync.h b/src/Backups/BackupCoordinationStageSync.h
index 2efaec46b3a..e34fbcc099b 100644
--- a/src/Backups/BackupCoordinationStageSync.h
+++ b/src/Backups/BackupCoordinationStageSync.h
@@ -29,7 +29,7 @@ private:
     void createRootNodes();
 
     struct State;
-    State readCurrentState(const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
+    State readCurrentState(WithRetries::RetriesControlHolder & retries_control_holder, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
 
     Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
 

From cab764fff9f6522be9684a5dbbce49c44c18e4f4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 03:44:26 +0100
Subject: [PATCH 153/213] Add a test

---
 ...2_window_functions_logical_error.reference | 216 ++++++++++++++++++
 .../02942_window_functions_logical_error.sql  | 158 +++++++++++++
 2 files changed, 374 insertions(+)
 create mode 100644 tests/queries/0_stateless/02942_window_functions_logical_error.reference
 create mode 100644 tests/queries/0_stateless/02942_window_functions_logical_error.sql

diff --git a/tests/queries/0_stateless/02942_window_functions_logical_error.reference b/tests/queries/0_stateless/02942_window_functions_logical_error.reference
new file mode 100644
index 00000000000..73f8351d9df
--- /dev/null
+++ b/tests/queries/0_stateless/02942_window_functions_logical_error.reference
@@ -0,0 +1,216 @@
+1	901	19
+1	911	19
+1	921	19
+1	931	19
+1	941	19
+1	951	20
+1	961	20
+1	971	20
+1	981	20
+1	991	20
+2	902	19
+2	912	19
+2	922	19
+2	932	19
+2	942	19
+2	952	20
+2	962	20
+2	972	20
+2	982	20
+2	992	20
+3	903	19
+3	913	19
+3	923	19
+3	933	19
+3	943	19
+3	953	20
+3	963	20
+3	973	20
+3	983	20
+3	993	20
+4	904	19
+4	914	19
+4	924	19
+4	934	19
+4	944	19
+4	954	20
+4	964	20
+4	974	20
+4	984	20
+4	994	20
+5	905	19
+5	915	19
+5	925	19
+5	935	19
+5	945	19
+5	955	20
+5	965	20
+5	975	20
+5	985	20
+5	995	20
+6	906	19
+6	916	19
+6	926	19
+6	936	19
+6	946	19
+6	956	20
+6	966	20
+6	976	20
+6	986	20
+6	996	20
+7	907	19
+7	917	19
+7	927	19
+7	937	19
+7	947	19
+7	957	20
+7	967	20
+7	977	20
+7	987	20
+7	997	20
+8	908	19
+8	918	19
+8	928	19
+8	938	19
+8	948	19
+8	958	20
+8	968	20
+8	978	20
+8	988	20
+8	998	20
+9	909	19
+9	919	19
+9	929	19
+9	939	19
+9	949	19
+9	959	20
+9	969	20
+9	979	20
+9	989	20
+9	999	20
+1	1301	19
+1	1311	19
+1	1321	19
+1	1331	19
+1	1341	19
+1	1351	19
+1	1361	19
+1	1371	20
+1	1381	20
+1	1391	20
+1	1401	20
+1	1411	20
+1	1421	20
+1	1431	20
+2	1302	19
+2	1312	19
+2	1322	19
+2	1332	19
+2	1342	19
+2	1352	19
+2	1362	19
+2	1372	20
+2	1382	20
+2	1392	20
+2	1402	20
+2	1412	20
+2	1422	20
+2	1432	20
+3	1303	19
+3	1313	19
+3	1323	19
+3	1333	19
+3	1343	19
+3	1353	19
+3	1363	19
+3	1373	20
+3	1383	20
+3	1393	20
+3	1403	20
+3	1413	20
+3	1423	20
+3	1433	20
+4	1304	19
+4	1314	19
+4	1324	19
+4	1334	19
+4	1344	19
+4	1354	19
+4	1364	19
+4	1374	20
+4	1384	20
+4	1394	20
+4	1404	20
+4	1414	20
+4	1424	20
+4	1434	20
+5	1305	19
+5	1315	19
+5	1325	19
+5	1335	19
+5	1345	19
+5	1355	19
+5	1365	19
+5	1375	20
+5	1385	20
+5	1395	20
+5	1405	20
+5	1415	20
+5	1425	20
+5	1435	20
+6	1306	19
+6	1316	19
+6	1326	19
+6	1336	19
+6	1346	19
+6	1356	19
+6	1366	19
+6	1376	20
+6	1386	20
+6	1396	20
+6	1406	20
+6	1416	20
+6	1426	20
+6	1436	20
+7	1307	19
+7	1317	19
+7	1327	19
+7	1337	19
+7	1347	19
+7	1357	19
+7	1367	19
+7	1377	20
+7	1387	20
+7	1397	20
+7	1407	20
+7	1417	20
+7	1427	20
+7	1437	20
+8	1308	19
+8	1318	19
+8	1328	19
+8	1338	19
+8	1348	19
+8	1358	19
+8	1368	19
+8	1378	20
+8	1388	20
+8	1398	20
+8	1408	20
+8	1418	20
+8	1428	20
+8	1438	20
+9	1309	19
+9	1319	19
+9	1329	19
+9	1339	19
+9	1349	19
+9	1359	19
+9	1369	19
+9	1379	20
+9	1389	20
+9	1399	20
+9	1409	20
+9	1419	20
+9	1429	20
+9	1439	20
diff --git a/tests/queries/0_stateless/02942_window_functions_logical_error.sql b/tests/queries/0_stateless/02942_window_functions_logical_error.sql
new file mode 100644
index 00000000000..1e4371a134f
--- /dev/null
+++ b/tests/queries/0_stateless/02942_window_functions_logical_error.sql
@@ -0,0 +1,158 @@
+DROP TABLE IF EXISTS posts;
+DROP TABLE IF EXISTS post_metrics;
+
+CREATE TABLE IF NOT EXISTS posts
+(
+    `page_id` LowCardinality(String),
+    `post_id` String CODEC(LZ4),
+    `host_id` UInt32 CODEC(T64, LZ4),
+    `path_id` UInt32,
+    `created` DateTime CODEC(T64, LZ4),
+    `as_of` DateTime CODEC(T64, LZ4)
+)
+ENGINE = ReplacingMergeTree(as_of)
+PARTITION BY toStartOfMonth(created)
+ORDER BY (page_id, post_id)
+TTL created + toIntervalMonth(26);
+
+
+INSERT INTO posts SELECT
+    repeat('a', (number % 10) + 1),
+    toString(number),
+    number % 10,
+    number,
+    now() - toIntervalMinute(number),
+    now()
+FROM numbers(1000);
+
+
+CREATE TABLE IF NOT EXISTS post_metrics
+(
+    `page_id` LowCardinality(String),
+    `post_id` String CODEC(LZ4),
+    `created` DateTime CODEC(T64, LZ4),
+    `impressions` UInt32 CODEC(T64, LZ4),
+    `clicks` UInt32 CODEC(T64, LZ4),
+    `as_of` DateTime CODEC(T64, LZ4)
+)
+ENGINE = ReplacingMergeTree(as_of)
+PARTITION BY toStartOfMonth(created)
+ORDER BY (page_id, post_id)
+TTL created + toIntervalMonth(26);
+
+
+INSERT INTO post_metrics SELECT
+    repeat('a', (number % 10) + 1),
+    toString(number),
+    now() - toIntervalMinute(number),
+    number * 100,
+    number * 10,
+    now()
+FROM numbers(1000);
+
+
+SELECT
+    host_id,
+    path_id,
+    max(rank) AS rank
+FROM
+(
+    WITH
+        as_of_posts AS
+        (
+            SELECT
+                *,
+                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
+            FROM posts
+            WHERE (created >= subtractHours(now(), 24)) AND (host_id > 0)
+        ),
+        as_of_post_metrics AS
+        (
+            SELECT
+                *,
+                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
+            FROM post_metrics
+            WHERE created >= subtractHours(now(), 24)
+        )
+    SELECT
+        page_id,
+        post_id,
+        host_id,
+        path_id,
+        impressions,
+        clicks,
+        ntile(20) OVER (PARTITION BY page_id ORDER BY clicks ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS rank
+    FROM as_of_posts
+    GLOBAL LEFT JOIN as_of_post_metrics USING (page_id, post_id, row_num)
+    WHERE (row_num = 1) AND (impressions > 0)
+) AS t
+WHERE t.rank > 18
+GROUP BY
+    host_id,
+    path_id
+ORDER BY host_id, path_id;
+
+
+INSERT INTO posts SELECT
+    repeat('a', (number % 10) + 1),
+    toString(number),
+    number % 10,
+    number,
+    now() - toIntervalMinute(number),
+    now()
+FROM numbers(100000);
+
+
+INSERT INTO post_metrics SELECT
+    repeat('a', (number % 10) + 1),
+    toString(number),
+    now() - toIntervalMinute(number),
+    number * 100,
+    number * 10,
+    now()
+FROM numbers(100000);
+
+
+SELECT
+    host_id,
+    path_id,
+    max(rank) AS rank
+FROM
+(
+    WITH
+        as_of_posts AS
+        (
+            SELECT
+                *,
+                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
+            FROM posts
+            WHERE (created >= subtractHours(now(), 24)) AND (host_id > 0)
+        ),
+        as_of_post_metrics AS
+        (
+            SELECT
+                *,
+                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
+            FROM post_metrics
+            WHERE created >= subtractHours(now(), 24)
+        )
+    SELECT
+        page_id,
+        post_id,
+        host_id,
+        path_id,
+        impressions,
+        clicks,
+        ntile(20) OVER (PARTITION BY page_id ORDER BY clicks ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS rank
+    FROM as_of_posts
+    GLOBAL LEFT JOIN as_of_post_metrics USING (page_id, post_id, row_num)
+    WHERE (row_num = 1) AND (impressions > 0)
+) AS t
+WHERE t.rank > 18
+GROUP BY
+    host_id,
+    path_id
+ORDER BY host_id, path_id;
+
+DROP TABLE posts;
+DROP TABLE post_metrics;

From 22200566c8fbadee0f6c820a9e5320bde9549165 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 03:45:00 +0100
Subject: [PATCH 154/213] Revert "Merge pull request #39631 from
 ClickHouse/parallel-window"

This reverts commit 33cc853d6193a7ad0e79aafb3c22b5effeeb9b17, reversing
changes made to b05a3d02ed5bbd8304bc48763a7c58b6f419eb6d.
---
 src/Interpreters/InterpreterSelectQuery.cpp   |   1 -
 src/Planner/Planner.cpp                       |   1 -
 src/Processors/QueryPlan/SortingStep.cpp      |  78 +----------
 src/Processors/QueryPlan/SortingStep.h        |  30 +---
 src/Processors/QueryPlan/WindowStep.cpp       |   3 +-
 .../ScatterByPartitionTransform.cpp           | 129 ------------------
 .../Transforms/ScatterByPartitionTransform.h  |  34 -----
 ...568_window_functions_distributed.reference |  29 ----
 .../01568_window_functions_distributed.sql    |   4 -
 .../02884_parallel_window_functions.reference | 100 --------------
 .../02884_parallel_window_functions.sql       | 119 ----------------
 11 files changed, 7 insertions(+), 521 deletions(-)
 delete mode 100644 src/Processors/Transforms/ScatterByPartitionTransform.cpp
 delete mode 100644 src/Processors/Transforms/ScatterByPartitionTransform.h
 delete mode 100644 tests/queries/0_stateless/02884_parallel_window_functions.reference
 delete mode 100644 tests/queries/0_stateless/02884_parallel_window_functions.sql

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 4f4e96a9be7..67245438156 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2942,7 +2942,6 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
             auto sorting_step = std::make_unique<SortingStep>(
                 query_plan.getCurrentDataStream(),
                 window.full_sort_description,
-                window.partition_by,
                 0 /* LIMIT */,
                 sort_settings,
                 settings.optimize_sorting_by_input_stream_properties);
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index d6e0f42a06d..12e8d795347 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -915,7 +915,6 @@ void addWindowSteps(QueryPlan & query_plan,
             auto sorting_step = std::make_unique<SortingStep>(
                 query_plan.getCurrentDataStream(),
                 window_description.full_sort_description,
-                window_description.partition_by,
                 0 /*limit*/,
                 sort_settings,
                 settings.optimize_sorting_by_input_stream_properties);
diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index 641b9036d4c..55ce763575e 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -1,4 +1,3 @@
-#include <memory>
 #include <stdexcept>
 #include <IO/Operators.h>
 #include <Processors/Merges/MergingSortedTransform.h>
@@ -10,8 +9,6 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Common/JSONBuilder.h>
 
-#include <Processors/ResizeProcessor.h>
-#include <Processors/Transforms/ScatterByPartitionTransform.h>
 
 namespace CurrentMetrics
 {
@@ -79,21 +76,6 @@ SortingStep::SortingStep(
     output_stream->sort_scope = DataStream::SortScope::Global;
 }
 
-SortingStep::SortingStep(
-        const DataStream & input_stream,
-        const SortDescription & description_,
-        const SortDescription & partition_by_description_,
-        UInt64 limit_,
-        const Settings & settings_,
-        bool optimize_sorting_by_input_stream_properties_)
-    : SortingStep(input_stream, description_, limit_, settings_, optimize_sorting_by_input_stream_properties_)
-{
-    partition_by_description = partition_by_description_;
-
-    output_stream->sort_description = result_description;
-    output_stream->sort_scope = DataStream::SortScope::Stream;
-}
-
 SortingStep::SortingStep(
     const DataStream & input_stream_,
     SortDescription prefix_description_,
@@ -135,11 +117,7 @@ void SortingStep::updateOutputStream()
 {
     output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
     output_stream->sort_description = result_description;
-
-    if (partition_by_description.empty())
-        output_stream->sort_scope = DataStream::SortScope::Global;
-    else
-        output_stream->sort_scope = DataStream::SortScope::Stream;
+    output_stream->sort_scope = DataStream::SortScope::Global;
 }
 
 void SortingStep::updateLimit(size_t limit_)
@@ -157,55 +135,6 @@ void SortingStep::convertToFinishSorting(SortDescription prefix_description_)
     prefix_description = std::move(prefix_description_);
 }
 
-void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline)
-{
-    size_t threads = pipeline.getNumThreads();
-    size_t streams = pipeline.getNumStreams();
-
-    if (!partition_by_description.empty() && threads > 1)
-    {
-        Block stream_header = pipeline.getHeader();
-
-        ColumnNumbers key_columns;
-        key_columns.reserve(partition_by_description.size());
-        for (auto & col : partition_by_description)
-        {
-            key_columns.push_back(stream_header.getPositionByName(col.column_name));
-        }
-
-        pipeline.transform([&](OutputPortRawPtrs ports)
-        {
-            Processors processors;
-            for (auto * port : ports)
-            {
-                auto scatter = std::make_shared<ScatterByPartitionTransform>(stream_header, threads, key_columns);
-                connect(*port, scatter->getInputs().front());
-                processors.push_back(scatter);
-            }
-            return processors;
-        });
-
-        if (streams > 1)
-        {
-            pipeline.transform([&](OutputPortRawPtrs ports)
-            {
-                Processors processors;
-                for (size_t i = 0; i < threads; ++i)
-                {
-                    size_t output_it = i;
-                    auto resize = std::make_shared<ResizeProcessor>(stream_header, streams, 1);
-                    auto & inputs = resize->getInputs();
-
-                    for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
-                        connect(*ports[output_it], *input_it);
-                    processors.push_back(resize);
-                }
-                return processors;
-            });
-        }
-    }
-}
-
 void SortingStep::finishSorting(
     QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, const UInt64 limit_)
 {
@@ -331,12 +260,10 @@ void SortingStep::fullSortStreams(
 void SortingStep::fullSort(
     QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, const UInt64 limit_, const bool skip_partial_sort)
 {
-    scatterByPartitionIfNeeded(pipeline);
-
     fullSortStreams(pipeline, sort_settings, result_sort_desc, limit_, skip_partial_sort);
 
     /// If there are several streams, then we merge them into one
-    if (pipeline.getNumStreams() > 1 && (partition_by_description.empty() || pipeline.getNumThreads() == 1))
+    if (pipeline.getNumStreams() > 1)
     {
         auto transform = std::make_shared<MergingSortedTransform>(
             pipeline.getHeader(),
@@ -368,7 +295,6 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
     {
         bool need_finish_sorting = (prefix_description.size() < result_description.size());
         mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit));
-
         if (need_finish_sorting)
         {
             finishSorting(pipeline, prefix_description, result_description, limit);
diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h
index 52f48f66a32..371a24ac6f2 100644
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@@ -40,15 +40,6 @@ public:
         const Settings & settings_,
         bool optimize_sorting_by_input_stream_properties_);
 
-    /// Full with partitioning
-    SortingStep(
-        const DataStream & input_stream,
-        const SortDescription & description_,
-        const SortDescription & partition_by_description_,
-        UInt64 limit_,
-        const Settings & settings_,
-        bool optimize_sorting_by_input_stream_properties_);
-
     /// FinishSorting
     SortingStep(
         const DataStream & input_stream_,
@@ -92,24 +83,14 @@ public:
         bool skip_partial_sort = false);
 
 private:
-    void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline);
     void updateOutputStream() override;
 
-    static void mergeSorting(
-        QueryPipelineBuilder & pipeline,
-        const Settings & sort_settings,
-        const SortDescription & result_sort_desc,
-        UInt64 limit_);
+    static void
+    mergeSorting(QueryPipelineBuilder & pipeline, const Settings & sort_settings, const SortDescription & result_sort_desc, UInt64 limit_);
 
-    void mergingSorted(
-        QueryPipelineBuilder & pipeline,
-        const SortDescription & result_sort_desc,
-        UInt64 limit_);
+    void mergingSorted(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, UInt64 limit_);
     void finishSorting(
-        QueryPipelineBuilder & pipeline,
-        const SortDescription & input_sort_desc,
-        const SortDescription & result_sort_desc,
-        UInt64 limit_);
+        QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, UInt64 limit_);
     void fullSort(
         QueryPipelineBuilder & pipeline,
         const SortDescription & result_sort_desc,
@@ -120,9 +101,6 @@ private:
 
     SortDescription prefix_description;
     const SortDescription result_description;
-
-    SortDescription partition_by_description;
-
     UInt64 limit;
     bool always_read_till_end = false;
 
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index bb4f429d626..9c68a4b73d1 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -67,8 +67,7 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
     // This resize is needed for cases such as `over ()` when we don't have a
     // sort node, and the input might have multiple streams. The sort node would
     // have resized it.
-    if (window_description.full_sort_description.empty())
-        pipeline.resize(1);
+    pipeline.resize(1);
 
     pipeline.addSimpleTransform(
         [&](const Block & /*header*/)
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
deleted file mode 100644
index 6e3cdc0fda1..00000000000
--- a/src/Processors/Transforms/ScatterByPartitionTransform.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-#include <Processors/Transforms/ScatterByPartitionTransform.h>
-
-#include <Common/PODArray.h>
-#include <Core/ColumnNumbers.h>
-
-namespace DB
-{
-ScatterByPartitionTransform::ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_)
-    : IProcessor(InputPorts{header}, OutputPorts{output_size_, header})
-    , output_size(output_size_)
-    , key_columns(std::move(key_columns_))
-    , hash(0)
-{}
-
-IProcessor::Status ScatterByPartitionTransform::prepare()
-{
-    auto & input = getInputs().front();
-
-    /// Check all outputs are finished or ready to get data.
-
-    bool all_finished = true;
-    for (auto & output : outputs)
-    {
-        if (output.isFinished())
-            continue;
-
-        all_finished = false;
-    }
-
-    if (all_finished)
-    {
-        input.close();
-        return Status::Finished;
-    }
-
-    if (!all_outputs_processed)
-    {
-        auto output_it = outputs.begin();
-        bool can_push = false;
-        for (size_t i = 0; i < output_size; ++i, ++output_it)
-            if (!was_output_processed[i] && output_it->canPush())
-                can_push = true;
-        if (!can_push)
-            return Status::PortFull;
-        return Status::Ready;
-    }
-    /// Try get chunk from input.
-
-    if (input.isFinished())
-    {
-        for (auto & output : outputs)
-            output.finish();
-
-        return Status::Finished;
-    }
-
-    input.setNeeded();
-    if (!input.hasData())
-        return Status::NeedData;
-
-    chunk = input.pull();
-    has_data = true;
-    was_output_processed.assign(outputs.size(), false);
-
-    return Status::Ready;
-}
-
-void ScatterByPartitionTransform::work()
-{
-    if (all_outputs_processed)
-        generateOutputChunks();
-    all_outputs_processed = true;
-
-    size_t chunk_number = 0;
-    for (auto & output : outputs)
-    {
-        auto & was_processed = was_output_processed[chunk_number];
-        auto & output_chunk = output_chunks[chunk_number];
-        ++chunk_number;
-
-        if (was_processed)
-            continue;
-
-        if (output.isFinished())
-            continue;
-
-        if (!output.canPush())
-        {
-            all_outputs_processed = false;
-            continue;
-        }
-
-        output.push(std::move(output_chunk));
-        was_processed = true;
-    }
-
-    if (all_outputs_processed)
-    {
-        has_data = false;
-        output_chunks.clear();
-    }
-}
-
-void ScatterByPartitionTransform::generateOutputChunks()
-{
-    auto num_rows = chunk.getNumRows();
-    const auto & columns = chunk.getColumns();
-
-    hash.reset(num_rows);
-
-    for (const auto & column_number : key_columns)
-        columns[column_number]->updateWeakHash32(hash);
-
-    const auto & hash_data = hash.getData();
-    IColumn::Selector selector(num_rows);
-
-    for (size_t row = 0; row < num_rows; ++row)
-        selector[row] = hash_data[row] % output_size;
-
-    output_chunks.resize(output_size);
-    for (const auto & column : columns)
-    {
-        auto filtered_columns = column->scatter(output_size, selector);
-        for (size_t i = 0; i < output_size; ++i)
-            output_chunks[i].addColumn(std::move(filtered_columns[i]));
-    }
-}
-
-}
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.h b/src/Processors/Transforms/ScatterByPartitionTransform.h
deleted file mode 100644
index 327f6dd62b4..00000000000
--- a/src/Processors/Transforms/ScatterByPartitionTransform.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-#include <Common/WeakHash.h>
-#include <Core/ColumnNumbers.h>
-#include <Processors/IProcessor.h>
-
-namespace DB
-{
-
-struct ScatterByPartitionTransform : IProcessor
-{
-    ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_);
-
-    String getName() const override { return "ScatterByPartitionTransform"; }
-
-    Status prepare() override;
-    void work() override;
-
-private:
-
-    void generateOutputChunks();
-
-    size_t output_size;
-    ColumnNumbers key_columns;
-
-    bool has_data = false;
-    bool all_outputs_processed = true;
-    std::vector<char> was_output_processed;
-    Chunk chunk;
-
-    WeakHash32 hash;
-    Chunks output_chunks;
-};
-
-}
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference
index 29ff2e7133c..13ac0769a24 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.reference
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference
@@ -22,16 +22,6 @@ select sum(number) over w as x, max(number) over w as y from t_01568 window w as
 21	8
 21	8
 21	8
-select sum(number) over w, max(number) over w from t_01568 window w as (partition by p) order by p;
-3	2
-3	2
-3	2
-12	5
-12	5
-12	5
-21	8
-21	8
-21	8
 select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 6	2
 6	2
@@ -51,25 +41,6 @@ select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,
 42	8
 42	8
 42	8
-select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y SETTINGS max_threads = 1;
-6	2
-6	2
-6	2
-6	2
-6	2
-6	2
-24	5
-24	5
-24	5
-24	5
-24	5
-24	5
-42	8
-42	8
-42	8
-42	8
-42	8
-42	8
 select distinct sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 6	2
 24	5
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql
index ecce7b412ba..95072d6460f 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.sql
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql
@@ -15,12 +15,8 @@ from numbers(9);
 
 select sum(number) over w as x, max(number) over w as y from t_01568 window w as (partition by p) order by x, y;
 
-select sum(number) over w, max(number) over w from t_01568 window w as (partition by p) order by p;
-
 select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 
-select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y SETTINGS max_threads = 1;
-
 select distinct sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 
 -- window functions + aggregation w/shards
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference
deleted file mode 100644
index bac15838dc2..00000000000
--- a/tests/queries/0_stateless/02884_parallel_window_functions.reference
+++ /dev/null
@@ -1,100 +0,0 @@
-1
--- { echoOn }
-
-SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    GROUP BY ac, nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10;
-0	2	0
-1	2	0
-2	2	0
-SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    GROUP BY ac, nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10
-SETTINGS max_threads = 1;
-0	2	0
-1	2	0
-2	2	0
-SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 0
-    GROUP BY
-        ac,
-        nw
-    UNION ALL
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 1
-    GROUP BY
-        ac,
-        nw
-    UNION ALL
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 2
-    GROUP BY
-        ac,
-        nw
-    UNION ALL
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 3
-    GROUP BY
-        ac,
-        nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10;
-0	2	0
-1	2	0
-2	2	0
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
deleted file mode 100644
index 3151b42f896..00000000000
--- a/tests/queries/0_stateless/02884_parallel_window_functions.sql
+++ /dev/null
@@ -1,119 +0,0 @@
-CREATE TABLE window_funtion_threading
-Engine = MergeTree
-ORDER BY (ac, nw)
-AS SELECT
-        toUInt64(toFloat32(number % 2) % 20000000) as ac,
-        toFloat32(1) as wg,        
-        toUInt16(toFloat32(number % 3) % 400) as nw
-FROM numbers_mt(10000000);
-
-SELECT count() FROM (EXPLAIN PIPELINE SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    GROUP BY ac, nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10) where explain ilike '%ScatterByPartitionTransform%' SETTINGS max_threads = 4;
-
--- { echoOn }
-
-SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    GROUP BY ac, nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10;
-
-SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    GROUP BY ac, nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10
-SETTINGS max_threads = 1;
-
-SELECT
-    nw,
-    sum(WR) AS R,
-    sumIf(WR, uniq_rows = 1) AS UNR
-FROM
-(
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 0
-    GROUP BY
-        ac,
-        nw
-    UNION ALL
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 1
-    GROUP BY
-        ac,
-        nw
-    UNION ALL
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 2
-    GROUP BY
-        ac,
-        nw
-    UNION ALL
-    SELECT
-        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
-        AVG(wg) AS WR,
-        ac,
-        nw
-    FROM window_funtion_threading
-    WHERE (ac % 4) = 3
-    GROUP BY
-        ac,
-        nw
-)
-GROUP BY nw
-ORDER BY nw ASC, R DESC
-LIMIT 10;

From 6cab0124cd51511baa151bb981c54305cc0c49f9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 06:02:44 +0300
Subject: [PATCH 155/213] Update SerializationString.cpp

---
 src/DataTypes/Serializations/SerializationString.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index 308bdce0507..a87c5e7d880 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -152,6 +152,7 @@ template <int UNROLL_TIMES>
 static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
 {
     size_t offset = data.size();
+    /// Avoiding calling resize in a loop improves the performance.
     data.resize(std::max(data.capacity(), static_cast<size_t>(4096)));
 
     for (size_t i = 0; i < limit; ++i)

From 4240e48a5e0230316c928a5999646a37b10137b8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 04:55:16 +0100
Subject: [PATCH 156/213] Sending the logs better

---
 docker/test/clickbench/run.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 7357fa6df86..e3c56ed9a75 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -53,4 +53,7 @@ set -x
 
 clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'"
 
+clickhouse-client -q "system flush logs" ||:
+stop_logs_replication
+
 echo -e "success\tClickBench finished" > /test_output/check_status.tsv

From b40e04a8beafb013e1dadd81971764313bb6d5fb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 05:04:37 +0100
Subject: [PATCH 157/213] Export the logs

---
 docker/test/clickbench/run.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index e3c56ed9a75..b3b4ea85e24 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -14,6 +14,15 @@ dpkg -i package_folder/clickhouse-client_*.deb
 mkdir /dev/shm/clickhouse
 chown clickhouse:clickhouse /dev/shm/clickhouse
 
+# Allow introspection functions, needed for sending the logs
+echo "
+profiles:
+    default:
+        allow_introspection_functions: 1
+" > /etc/clickhouse-server/allow_introspection_functions.yaml
+
+config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
+
 clickhouse start
 
 # Wait for the server to start, but not for too long.

From 02720cde010ea7a3a8ace0a4b97def2222cc8bc3 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 12 Dec 2023 04:06:43 +0000
Subject: [PATCH 158/213] Fix

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 25 ++++++++++++-----
 .../replaceForPositionalArguments.cpp         | 27 ++++++++++++++-----
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index a52a0fac232..a3b461f32ea 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2153,21 +2153,32 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
             node_to_replace = &sort_node->getExpression();
 
         auto * constant_node = (*node_to_replace)->as<ConstantNode>();
+
         if (!constant_node
             || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64))
             continue;
 
-        auto positional_argument_number = constant_node->getValue().get<Int64>();
-        if (positional_argument_number == 0 || static_cast<size_t>(std::abs(positional_argument_number)) > projection_nodes.size())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+        UInt64 pos;
+        if (constant_node->getValue().getType() == Field::Types::UInt64)
+        {
+            pos = constant_node->getValue().get<UInt64>();
+        }
+        else // Int64
+        {
+            auto value = constant_node->getValue().get<Int64>();
+            pos = value > 0 ? value : projection_nodes.size() + value + 1;
+        }
+
+
+        if (!pos || pos > projection_nodes.size())
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
                 "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}",
-                positional_argument_number,
+                pos,
                 projection_nodes.size(),
                 scope.scope_node->formatASTForErrorMessage());
 
-        positional_argument_number
-            = (positional_argument_number > 0) ? --positional_argument_number : projection_nodes.size() + positional_argument_number;
-        *node_to_replace = projection_nodes[positional_argument_number];
+        *node_to_replace = projection_nodes[--pos];
     }
 }
 
diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp
index f5a77dacd4c..8306da17f52 100644
--- a/src/Interpreters/replaceForPositionalArguments.cpp
+++ b/src/Interpreters/replaceForPositionalArguments.cpp
@@ -30,15 +30,28 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
     if (which != Field::Types::UInt64 && which != Field::Types::Int64)
         return false;
 
-    auto pos = ast_literal->value.get<Int64>();
-    if (!pos || static_cast<size_t>(std::abs(pos)) > columns.size())
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Positional argument out of bounds: {} (expected in range [1, {}]",
-                        pos, columns.size());
+    UInt64 pos;
 
-    pos = (pos > 0) ? --pos : columns.size() + pos;
+    if (which == Field::Types::UInt64)
+    {
+        pos = ast_literal->value.get<UInt64>();
+    }
+    else if (which == Field::Types::Int64)
+    {
+        auto value = ast_literal->value.get<Int64>();
+        pos = value > 0 ? value : columns.size() + value + 1;
+    }
+    else
+    {
+        return false;
+    }
 
-    const auto & column = columns[pos];
+
+    if (!pos || pos > columns.size())
+        throw Exception(
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Positional argument out of bounds: {} (expected in range [1, {}]", pos, columns.size());
+
+    const auto & column = columns[--pos];
     if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))
     {
         argument = column->clone();

From 9789c2caa214e17bb8323c9d67b6cc62c56eb350 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 05:48:09 +0100
Subject: [PATCH 159/213] Review fixes

---
 .github/workflows/pull_request.yml            |  8 ++-----
 docker/test/clickbench/run.sh                 |  2 ++
 src/Common/parseRemoteDescription.cpp         |  2 +-
 src/Common/parseRemoteDescription.h           |  4 ++++
 .../Cached/registerDiskCache.cpp              |  3 +--
 tests/ci/clickbench.py                        | 22 +------------------
 6 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 5bb62b04c32..0be703e1196 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -709,22 +709,18 @@ jobs:
     with:
       test_name: ClickBench (amd64)
       runner_type: func-tester
-      additional_envs: |
-        KILL_TIMEOUT=1800
       run_command: |
         cd "$REPO_COPY/tests/ci"
-        python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT"
+        python3 clickbench.py "$CHECK_NAME"
   ClickBenchAarch64:
     needs: [BuilderDebAarch64]
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: ClickBench (aarch64)
       runner_type: func-tester-aarch64
-      additional_envs: |
-        KILL_TIMEOUT=1800
       run_command: |
         cd "$REPO_COPY/tests/ci"
-        python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT"
+        python3 clickbench.py "$CHECK_NAME"
 ##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index b3b4ea85e24..a344e0ec27c 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -65,4 +65,6 @@ clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'h
 clickhouse-client -q "system flush logs" ||:
 stop_logs_replication
 
+mv /var/log/clickhouse-server/* /test_output/
+
 echo -e "success\tClickBench finished" > /test_output/check_status.tsv
diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp
index 8ea3f4a0aa5..7b2045b9de1 100644
--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@@ -184,7 +184,7 @@ std::vector<std::pair<String, uint16_t>> parseRemoteDescriptionForExternalDataba
         }
         else
         {
-            result.emplace_back(std::make_pair(address.substr(0, colon), DB::parseFromString<UInt16>(address.substr(colon + 1))));
+            result.emplace_back(std::make_pair(address.substr(0, colon), parseFromString<UInt16>(address.substr(colon + 1))));
         }
     }
 
diff --git a/src/Common/parseRemoteDescription.h b/src/Common/parseRemoteDescription.h
index d97558c4728..12435bc68a0 100644
--- a/src/Common/parseRemoteDescription.h
+++ b/src/Common/parseRemoteDescription.h
@@ -1,8 +1,12 @@
 #pragma once
+
 #include <base/types.h>
 #include <vector>
+
+
 namespace DB
 {
+
 /* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ','
  *  depending on whether shards or replicas are generated.
  * For example:
diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
index 182326bbdc3..99fd2c932af 100644
--- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
+++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
@@ -6,9 +6,8 @@
 #include <Common/filesystemHelpers.h>
 #include <Common/NamedCollections/NamedCollections.h>
 #include <Disks/DiskFactory.h>
-#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
 #include <Disks/ObjectStorages/DiskObjectStorage.h>
-#include <Interpreters/Context.h>
+
 
 namespace DB
 {
diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 359c10eeb9d..061d36f02fa 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -23,7 +23,6 @@ from commit_status_helper import (
     get_commit,
     override_status,
     post_commit_status,
-    post_commit_status_to_file,
     update_mergeable_check,
 )
 from docker_pull_helper import DockerImage, get_image_with_version
@@ -113,13 +112,6 @@ def process_results(
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("check_name")
-    parser.add_argument("kill_timeout", type=int)
-    parser.add_argument(
-        "--post-commit-status",
-        default="commit_status",
-        choices=["commit_status", "file"],
-        help="Where to public post commit status",
-    )
     return parser.parse_args()
 
 
@@ -214,19 +206,7 @@ def main():
     )
 
     print(f"::notice:: {check_name} Report url: {report_url}")
-    if args.post_commit_status == "commit_status":
-        post_commit_status(commit, state, report_url, description, check_name, pr_info)
-    elif args.post_commit_status == "file":
-        post_commit_status_to_file(
-            post_commit_path,
-            description,
-            state,
-            report_url,
-        )
-    else:
-        raise Exception(
-            f'Unknown post_commit_status option "{args.post_commit_status}"'
-        )
+    post_commit_status(commit, state, report_url, description, check_name, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,

From 928659b1ae127bcfb9ec166ada6109540a7af3c4 Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 12 Dec 2023 05:36:07 +0000
Subject: [PATCH 160/213] update test

---
 tests/queries/0_stateless/01162_strange_mutations.sh     | 2 +-
 tests/queries/0_stateless/01798_having_push_down.sql     | 3 ++-
 tests/queries/0_stateless/02932_group_by_null_fuzzer.sql | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh
index eea9ea5f7e5..f6b31847c1e 100755
--- a/tests/queries/0_stateless/01162_strange_mutations.sh
+++ b/tests/queries/0_stateless/01162_strange_mutations.sh
@@ -28,7 +28,7 @@ do
     $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica"
     $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test"
     if [[ $engine == *"ReplicatedMergeTree"* ]]; then
-        $CLICKHOUSE_CLIENT -q "ALTER TABLE test
+        $CLICKHOUSE_CLIENT --enable_positional_arguments=0 -q "ALTER TABLE test
             UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 'dummy')[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "statement with subquery may be nondeterministic"
         $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "ALTER TABLE test
                     UPDATE test = (SELECT groupArray(id) FROM t1)[n - 99] WHERE 1"
diff --git a/tests/queries/0_stateless/01798_having_push_down.sql b/tests/queries/0_stateless/01798_having_push_down.sql
index b3a77c8f5b5..c0c3447f5ab 100644
--- a/tests/queries/0_stateless/01798_having_push_down.sql
+++ b/tests/queries/0_stateless/01798_having_push_down.sql
@@ -8,11 +8,12 @@ SELECT sum(c0 = 0), min(c0 + 1), sum(c0 + 2) FROM t_having
 GROUP BY c0 HAVING c0 = 0
 SETTINGS enable_optimize_predicate_expression=0;
 
+SET enable_positional_arguments=0;
+
 SELECT c0 + -1, sum(intDivOrZero(intDivOrZero(NULL, NULL), '2'), intDivOrZero(10000000000., intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), 10), NULL))) FROM t_having GROUP BY c0 = 2, c0 = 10, intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), NULL), NULL), c0 HAVING c0 = 2 SETTINGS enable_optimize_predicate_expression = 0;
 
 SELECT sum(c0 + 257) FROM t_having GROUP BY c0 = -9223372036854775808, NULL, -2147483649, c0 HAVING c0 = -9223372036854775808 SETTINGS enable_optimize_predicate_expression = 0;
 
-SET enable_positional_arguments=0;
 SELECT c0 + -2, c0 + -9223372036854775807, c0 = NULL FROM t_having GROUP BY c0 = 0.9998999834060669, 1023, c0 HAVING c0 = 0.9998999834060669 SETTINGS enable_optimize_predicate_expression = 0;
 
 DROP TABLE t_having;
diff --git a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
index 0c28c120d40..603c7783ef8 100644
--- a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
+++ b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
@@ -1,5 +1,6 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/43202
 -- Queries are generated by the fuzzer, so don't expect them to make sense
+SET enable_positional_arguments=0;
 SELECT NULL, '' FROM (SELECT toNullable(''), NULL AS key GROUP BY GROUPING SETS ((NULL))) AS s1 ALL LEFT JOIN (SELECT '' AS key, NULL AS value GROUP BY GROUPING SETS (('')) WITH TOTALS UNION ALL SELECT NULL AS key, toNullable(NULL) AS value GROUP BY '', NULL, '' WITH TOTALS) AS s2 USING (key);
 SELECT NULL GROUP BY NULL WITH TOTALS;
 SELECT 1048575, NULL, b FROM (SELECT '25.5' AS a, NULL, NULL AS b GROUP BY GROUPING SETS ((0.0001)) WITH TOTALS) AS js1 ANY RIGHT JOIN (SELECT NULL AS a, NULL AS b WHERE NULL GROUP BY NULL, -9223372036854775807 WITH CUBE WITH TOTALS UNION ALL SELECT NULL AS a, NULL AS b GROUP BY 1, '21474836.46' WITH TOTALS) AS js2 USING (a, b) ORDER BY nan DESC NULLS LAST, '9223372036854775807' DESC NULLS LAST, a ASC NULLS LAST;

From 8a68a4247e164dd822460fc73e03f4f2ad8b8a2c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 07:58:54 +0100
Subject: [PATCH 161/213] Style

---
 tests/ci/clickbench.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 061d36f02fa..096309eaf92 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -124,7 +124,6 @@ def main():
     temp_path.mkdir(parents=True, exist_ok=True)
 
     reports_path = Path(REPORTS_PATH)
-    post_commit_path = temp_path / "clickbench_status.tsv"
 
     args = parse_args()
     check_name = args.check_name

From be9fac3a55392da05dba36b7a8adc949ae5da593 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <thevar1able@users.noreply.github.com>
Date: Tue, 12 Dec 2023 09:26:06 +0100
Subject: [PATCH 162/213] Lint includes

Co-authored-by: alesapin <alesapin@clickhouse.com>
---
 src/Server/KeeperReadinessHandler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h
index caa59098427..00b51b886f9 100644
--- a/src/Server/KeeperReadinessHandler.h
+++ b/src/Server/KeeperReadinessHandler.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "config.h"
+#include <config.h>
 
 #if USE_NURAFT
 

From 1f9c7336a97b88a070d0ce783ff5e687c8abcfb7 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 12 Dec 2023 09:32:16 +0100
Subject: [PATCH 163/213] Fix test helper naming

---
 tests/integration/helpers/keeper_utils.py          | 2 +-
 tests/integration/test_keeper_http_control/test.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index e07bce901d2..39fa0d0f074 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -279,7 +279,7 @@ def get_leader(cluster, nodes):
     raise Exception("No leader in Keeper cluster.")
 
 
-def get_follower(cluster, nodes):
+def get_any_follower(cluster, nodes):
     for node in nodes:
         if is_follower(cluster, node):
             return node
diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py
index b415a03a5c4..8bffaa6763c 100644
--- a/tests/integration/test_keeper_http_control/test.py
+++ b/tests/integration/test_keeper_http_control/test.py
@@ -51,7 +51,7 @@ def test_http_readiness(started_cluster):
     assert readiness_data["details"]["leader"] == True
     assert readiness_data["details"]["follower"] == False
 
-    follower = keeper_utils.get_follower(cluster, [node1, node2, node3])
+    follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3])
     response = requests.get(
         "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182)
     )

From 69a022f72a35b214b8305ae2cd5bca90dcb6f099 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 12 Dec 2023 09:42:32 +0100
Subject: [PATCH 164/213] Add `observer` status

---
 src/Server/KeeperReadinessHandler.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp
index 37afd8e9898..148a209fb12 100644
--- a/src/Server/KeeperReadinessHandler.cpp
+++ b/src/Server/KeeperReadinessHandler.cpp
@@ -25,13 +25,15 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP
     {
         auto is_leader = keeper_dispatcher->isLeader();
         auto is_follower = keeper_dispatcher->isFollower() && keeper_dispatcher->hasLeader();
+        auto is_observer = keeper_dispatcher->isObserver() && keeper_dispatcher->hasLeader();
 
-        auto status = is_leader || is_follower;
+        auto status = is_leader || is_follower || is_observer;
 
         Poco::JSON::Object json, details;
 
         details.set("leader", is_leader);
         details.set("follower", is_follower);
+        details.set("observer", is_observer);
         json.set("details", details);
         json.set("status", status ? "ok": "fail");
 

From efa2e0341ab66004ce3c6695b43f5d15213941c7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 12 Dec 2023 08:45:25 +0000
Subject: [PATCH 165/213] Docs: Fix typo

---
 docs/en/development/developer-instruction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 645756a46c7..31346c77949 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -72,7 +72,7 @@ You can also add original ClickHouse repo address to your local repository to pu
 After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`.
 
 :::note 
-Instructions below assume you are building on Linux. If you are cross-compiling or using building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on.
+Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on.
 :::
 
 ## Build System {#build-system}

From ea123ed5c143ee221fb372d462ede73a1492c317 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 12 Dec 2023 11:35:01 +0100
Subject: [PATCH 166/213] Change response structure

---
 src/Server/KeeperReadinessHandler.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp
index 148a209fb12..ed972055aee 100644
--- a/src/Server/KeeperReadinessHandler.cpp
+++ b/src/Server/KeeperReadinessHandler.cpp
@@ -27,15 +27,16 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP
         auto is_follower = keeper_dispatcher->isFollower() && keeper_dispatcher->hasLeader();
         auto is_observer = keeper_dispatcher->isObserver() && keeper_dispatcher->hasLeader();
 
+        auto data = keeper_dispatcher->getKeeper4LWInfo();
+
         auto status = is_leader || is_follower || is_observer;
 
         Poco::JSON::Object json, details;
 
-        details.set("leader", is_leader);
-        details.set("follower", is_follower);
-        details.set("observer", is_observer);
+        details.set("role", data.getRole());
+        details.set("hasLeader", keeper_dispatcher->hasLeader());
         json.set("details", details);
-        json.set("status", status ? "ok": "fail");
+        json.set("status", status ? "ok" : "fail");
 
         std::ostringstream oss;     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
         oss.exceptions(std::ios::failbit);

From 8fe2cd1a7effaca3676b44158004fe5747a8bbc2 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 12 Dec 2023 11:35:17 +0100
Subject: [PATCH 167/213] Update tests

---
 .../test_keeper_http_control/test.py          | 44 ++++++++++++-------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py
index 8bffaa6763c..ed86e06c626 100644
--- a/tests/integration/test_keeper_http_control/test.py
+++ b/tests/integration/test_keeper_http_control/test.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 
 import os
+import time
 import pytest
 import requests
 
-import helpers.keeper_utils as keeper_utils
-from kazoo.client import KazooClient
 from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
+import helpers.keeper_utils as keeper_utils
 
 cluster = ClickHouseCluster(__file__)
 CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs")
@@ -30,16 +31,7 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
-
-def get_fake_zk(node, timeout=30.0):
-    _fake_zk_instance = KazooClient(
-        hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout
-    )
-    _fake_zk_instance.start()
-    return _fake_zk_instance
-
-
-def test_http_readiness(started_cluster):
+def test_http_readiness_basic_responses(started_cluster):
     leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
     response = requests.get(
         "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182)
@@ -48,8 +40,7 @@ def test_http_readiness(started_cluster):
 
     readiness_data = response.json()
     assert readiness_data["status"] == "ok"
-    assert readiness_data["details"]["leader"] == True
-    assert readiness_data["details"]["follower"] == False
+    assert readiness_data["details"]["role"] == "leader"
 
     follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3])
     response = requests.get(
@@ -59,5 +50,26 @@ def test_http_readiness(started_cluster):
 
     readiness_data = response.json()
     assert readiness_data["status"] == "ok"
-    assert readiness_data["details"]["leader"] == False
-    assert readiness_data["details"]["follower"] == True
+    assert readiness_data["details"]["role"] == "follower"
+    assert readiness_data["details"]["hasLeader"] == True
+
+def test_http_readiness_partitioned_cluster(started_cluster):
+    with PartitionManager() as pm:
+        leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
+        follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3])
+
+        pm.partition_instances(
+            leader, follower
+        )
+        time.sleep(3)
+
+        response = requests.get(
+            "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182)
+        )
+        print(response.json())
+        assert response.status_code == 503
+
+        readiness_data = response.json()
+        assert readiness_data["status"] == "fail"
+        assert readiness_data["details"]["role"] == "follower"
+        assert readiness_data["details"]["hasLeader"] == False

From b49452fb45e7b68575411e1bd9479d2c7e9531cb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 12 Dec 2023 10:44:55 +0000
Subject: [PATCH 168/213] Automatic style fix

---
 tests/integration/test_keeper_http_control/test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py
index ed86e06c626..49d2f70e6b0 100644
--- a/tests/integration/test_keeper_http_control/test.py
+++ b/tests/integration/test_keeper_http_control/test.py
@@ -31,6 +31,7 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
+
 def test_http_readiness_basic_responses(started_cluster):
     leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
     response = requests.get(
@@ -53,14 +54,13 @@ def test_http_readiness_basic_responses(started_cluster):
     assert readiness_data["details"]["role"] == "follower"
     assert readiness_data["details"]["hasLeader"] == True
 
+
 def test_http_readiness_partitioned_cluster(started_cluster):
     with PartitionManager() as pm:
         leader = keeper_utils.get_leader(cluster, [node1, node2, node3])
         follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3])
 
-        pm.partition_instances(
-            leader, follower
-        )
+        pm.partition_instances(leader, follower)
         time.sleep(3)
 
         response = requests.get(

From 511cfb393dcc7765c30a0ff50d909ca88bbfa35a Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 12 Dec 2023 12:27:49 +0100
Subject: [PATCH 169/213] Remove `time.sleep` from test

---
 tests/integration/test_keeper_http_control/test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py
index 49d2f70e6b0..65dc5bea909 100644
--- a/tests/integration/test_keeper_http_control/test.py
+++ b/tests/integration/test_keeper_http_control/test.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 
 import os
-import time
 import pytest
 import requests
 
@@ -61,7 +60,7 @@ def test_http_readiness_partitioned_cluster(started_cluster):
         follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3])
 
         pm.partition_instances(leader, follower)
-        time.sleep(3)
+        keeper_utils.wait_until_quorum_lost(cluster, follower)
 
         response = requests.get(
             "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182)

From cfe6bc2cc547d91759b69f071d4ad96cec20ab87 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 7 Dec 2023 00:57:01 +0100
Subject: [PATCH 170/213] Replace len by sum for generator

---
 tests/ci/workflow_jobs_lambda/app.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index 6931835f601..4210ca22da9 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -160,9 +160,7 @@ def handler(event: dict, context: Any) -> dict:
         steps = 0
     else:
         # We record only finished steps
-        steps = len(
-            [step for step in wf_job["steps"] if step["conclusion"] is not None]
-        )
+        steps = sum(1 for st in wf_job["steps"] if st["conclusion"] is not None)
 
     workflow_job = WorkflowJob(
         wf_job["id"],

From 7ff30211128d08a82cd830d4d1ed16321d58fa47 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 14:12:10 +0100
Subject: [PATCH 171/213] Fix Docker

---
 tests/ci/ci_config.py  | 2 +-
 tests/ci/clickbench.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index e9f75d66b2e..de2ba3dc1ce 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -512,7 +512,7 @@ CHECK_DESCRIPTIONS = [
     CheckDescription(
         "ClickBench",
         "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table",
-        lambda x: x.startswith("Upgrade check ("),
+        lambda x: x.startswith("ClickBench"),
     ),
     CheckDescription(
         "Falback for unknown",
diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py
index 096309eaf92..2ea5e39ce8e 100644
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@@ -53,7 +53,7 @@ def get_run_command(
     env_str = " ".join(envs)
 
     return (
-        f"docker run --volume={builds_path}:/package_folder "
+        f"docker run --shm-size=16g --volume={builds_path}:/package_folder "
         f"{ci_logs_args}"
         f"--volume={result_path}:/test_output "
         f"--volume={server_log_path}:/var/log/clickhouse-server "

From f1a330e95de085781e75840891060528c9ec301d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 12 Dec 2023 13:46:29 +0000
Subject: [PATCH 172/213] Add a comment

---
 src/Processors/Transforms/AggregatingTransform.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index 7b1d51bb320..0f0fa38727d 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -721,6 +721,8 @@ void AggregatingTransform::initGenerate()
 
     if (many_data->num_finished.fetch_add(1) + 1 < many_data->variants.size())
     {
+        /// Note: we reset aggregation state here to release memory earlier.
+        /// It might cause extra memory usage for complex queries othervise.
         many_data.reset();
         return;
     }

From e77cb18d184c3c06d4765d5cb320d8b4920b9f60 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 12 Dec 2023 15:27:42 +0100
Subject: [PATCH 173/213] Better test

---
 tests/integration/helpers/postgres_utility.py |  4 +-
 .../test.py                                   | 55 ++++++++++---------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py
index 4bf549174e8..690a833f37f 100644
--- a/tests/integration/helpers/postgres_utility.py
+++ b/tests/integration/helpers/postgres_utility.py
@@ -280,9 +280,9 @@ class PostgresManager:
             f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)"
         )
 
-    def create_and_fill_postgres_tables(self, tables_num, numbers=50, database_name=""):
+    def create_and_fill_postgres_tables(self, tables_num, numbers=50, database_name="", table_name_base="postgresql_replica"):
         for i in range(tables_num):
-            table_name = f"postgresql_replica_{i}"
+            table_name = f"{table_name_base}_{i}"
             create_postgres_table(self.cursor, table_name, database_name)
             if numbers > 0:
                 db = self.database_or_default(database_name)
diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py
index c118080a572..e4cce96244f 100644
--- a/tests/integration/test_postgresql_replica_database_engine_1/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py
@@ -393,18 +393,19 @@ def test_table_schema_changes(started_cluster):
 
 
 def test_many_concurrent_queries(started_cluster):
+    table = "test_many_conc"
     query_pool = [
-        "DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;",
-        "UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;",
-        "DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;",
-        "UPDATE postgresql_replica_{} SET value = value*5 WHERE key % 2 = 1;",
-        "DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;",
-        "UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;",
-        "DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;",
-        "UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;",
-        "DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;",
-        "UPDATE postgresql_replica_{} SET value = value + 2  WHERE key % 3 = 1;",
-        "DELETE FROM postgresql_replica_{} WHERE value%5 = 0;",
+        "DELETE FROM {} WHERE (value*value) % 3 = 0;",
+        "UPDATE {} SET value = value - 125 WHERE key % 2 = 0;",
+        "DELETE FROM {} WHERE key % 10 = 0;",
+        "UPDATE {} SET value = value*5 WHERE key % 2 = 1;",
+        "DELETE FROM {} WHERE value % 2 = 0;",
+        "UPDATE {} SET value = value + 2000 WHERE key % 5 = 0;",
+        "DELETE FROM {} WHERE value % 3 = 0;",
+        "UPDATE {} SET value = value * 2 WHERE key % 3 = 0;",
+        "DELETE FROM {} WHERE value % 9 = 2;",
+        "UPDATE {} SET value = value + 2  WHERE key % 3 = 1;",
+        "DELETE FROM {} WHERE value%5 = 0;",
     ]
 
     NUM_TABLES = 5
@@ -415,7 +416,7 @@ def test_many_concurrent_queries(started_cluster):
         database=True,
     )
     cursor = conn.cursor()
-    pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=10000)
+    pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=10000, table_name_base=table)
 
     def attack(thread_id):
         print("thread {}".format(thread_id))
@@ -423,17 +424,19 @@ def test_many_concurrent_queries(started_cluster):
         for i in range(20):
             query_id = random.randrange(0, len(query_pool) - 1)
             table_id = random.randrange(0, 5)  # num tables
+            random_table_name = f"{table}_{table_id}"
+            table_name = f"{table}_{thread_id}"
 
             # random update / delete query
-            cursor.execute(query_pool[query_id].format(table_id))
-            print("table {} query {} ok".format(table_id, query_id))
+            cursor.execute(query_pool[query_id].format(random_table_name))
+            print("table {} query {} ok".format(random_table_name, query_id))
 
             # allow some thread to do inserts (not to violate key constraints)
             if thread_id < 5:
                 print("try insert table {}".format(thread_id))
                 instance.query(
-                    "INSERT INTO postgres_database.postgresql_replica_{} SELECT {}*10000*({} +  number), number from numbers(1000)".format(
-                        i, thread_id, k
+                    "INSERT INTO postgres_database.{} SELECT {}*10000*({} +  number), number from numbers(1000)".format(
+                        table_name, thread_id, k
                     )
                 )
                 k += 1
@@ -443,8 +446,8 @@ def test_many_concurrent_queries(started_cluster):
                     # also change primary key value
                     print("try update primary key {}".format(thread_id))
                     cursor.execute(
-                        "UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(
-                            thread_id, i + 1, i + 1
+                        "UPDATE {table}_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(
+                            table_name, i + 1, i + 1
                         )
                     )
                     print("update primary key {} ok".format(thread_id))
@@ -467,25 +470,25 @@ def test_many_concurrent_queries(started_cluster):
     n[0] = 50000
     for table_id in range(NUM_TABLES):
         n[0] += 1
+        table_name = f"{table}_{table_id}"
         instance.query(
-            "INSERT INTO postgres_database.postgresql_replica_{} SELECT {} +  number, number from numbers(5000)".format(
-                table_id, n[0]
+            "INSERT INTO postgres_database.{} SELECT {} +  number, number from numbers(5000)".format(
+                table_name, n[0]
             )
         )
-        # cursor.execute("UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(table_id, table_id+1, table_id+1))
+        # cursor.execute("UPDATE {table}_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(table_id, table_id+1, table_id+1))
 
     for thread in threads:
         thread.join()
 
     for i in range(NUM_TABLES):
-        check_tables_are_synchronized(instance, "postgresql_replica_{}".format(i))
+        table_name = f"{table}_{i}"
+        check_tables_are_synchronized(instance, table_name)
         count1 = instance.query(
-            "SELECT count() FROM postgres_database.postgresql_replica_{}".format(i)
+            "SELECT count() FROM postgres_database.{}".format(table_name)
         )
         count2 = instance.query(
-            "SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})".format(
-                i
-            )
+            "SELECT count() FROM (SELECT * FROM test_database.{})".format(table_name)
         )
         assert int(count1) == int(count2)
         print(count1, count2)

From f2336ff0253703c755dae8f53cc7c0604fb7f450 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 12 Dec 2023 14:43:44 +0000
Subject: [PATCH 174/213] Automatic style fix

---
 tests/integration/helpers/postgres_utility.py             | 8 +++++++-
 .../test_postgresql_replica_database_engine_1/test.py     | 4 +++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py
index 690a833f37f..468c3b3bb63 100644
--- a/tests/integration/helpers/postgres_utility.py
+++ b/tests/integration/helpers/postgres_utility.py
@@ -280,7 +280,13 @@ class PostgresManager:
             f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)"
         )
 
-    def create_and_fill_postgres_tables(self, tables_num, numbers=50, database_name="", table_name_base="postgresql_replica"):
+    def create_and_fill_postgres_tables(
+        self,
+        tables_num,
+        numbers=50,
+        database_name="",
+        table_name_base="postgresql_replica",
+    ):
         for i in range(tables_num):
             table_name = f"{table_name_base}_{i}"
             create_postgres_table(self.cursor, table_name, database_name)
diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py
index e4cce96244f..2e0a597f885 100644
--- a/tests/integration/test_postgresql_replica_database_engine_1/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py
@@ -416,7 +416,9 @@ def test_many_concurrent_queries(started_cluster):
         database=True,
     )
     cursor = conn.cursor()
-    pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=10000, table_name_base=table)
+    pg_manager.create_and_fill_postgres_tables(
+        NUM_TABLES, numbers=10000, table_name_base=table
+    )
 
     def attack(thread_id):
         print("thread {}".format(thread_id))

From 7142eacad3c8d2e793f21a36bb58d94d6d9b5656 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 12 Dec 2023 17:54:26 +0300
Subject: [PATCH 175/213] SerializationString reduce memory usage

---
 src/DataTypes/Serializations/SerializationString.cpp    | 2 +-
 tests/queries/0_stateless/01926_order_by_desc_limit.sql | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index a87c5e7d880..788ff429088 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -175,7 +175,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
         offsets.push_back(offset);
 
         if (unlikely(offset > data.size()))
-            data.resize(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2)));
+            data.resize_exact(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2)));
 
         if (size)
         {
diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql
index 6854e6c1e84..a0047a2925a 100644
--- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql
+++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql
@@ -11,9 +11,11 @@ SETTINGS index_granularity = 1024, index_granularity_bytes = '10Mi';
 INSERT INTO order_by_desc SELECT number, repeat('a', 1024) FROM numbers(1024 * 300);
 OPTIMIZE TABLE order_by_desc FINAL;
 
-SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null;
+SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null
+SETTINGS max_memory_usage = '400M';
 
-SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null;
+SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null
+SETTINGS max_memory_usage = '400M';
 
 SYSTEM FLUSH LOGS;
 

From 54676707412d586b1f97a773a22a540b7eb40d85 Mon Sep 17 00:00:00 2001
From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com>
Date: Tue, 12 Dec 2023 11:13:34 -0400
Subject: [PATCH 176/213] Mentions that APPEND or TRUNCATE should be used with
 INTO-OUTFILE.

---
 docs/en/sql-reference/statements/select/into-outfile.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md
index 352af16042a..985f5e25b05 100644
--- a/docs/en/sql-reference/statements/select/into-outfile.md
+++ b/docs/en/sql-reference/statements/select/into-outfile.md
@@ -26,6 +26,7 @@ SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION typ
 - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
 - If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output.
 - If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used.
+- When writing to a file that already exists, `APPEND` or `TRUNCATE` must be used.
 
 **Example**
 

From af4f1abadd1563b00d5ff572142b59039eee76c3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 16:41:38 +0100
Subject: [PATCH 177/213] Fix error

---
 docker/test/clickbench/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index a344e0ec27c..471e1fd6714 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -21,7 +21,7 @@ profiles:
         allow_introspection_functions: 1
 " > /etc/clickhouse-server/allow_introspection_functions.yaml
 
-config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
+config_logs_export_cluster /etc/clickhouse-server/users.d/system_logs_export.yaml
 
 clickhouse start
 

From 12561c0c9b7abaee9c7bf0d469de909b70af9c84 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 16:42:31 +0100
Subject: [PATCH 178/213] Maybe better

---
 docker/test/clickbench/run.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 471e1fd6714..5d2312c22c5 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -64,6 +64,7 @@ clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'h
 
 clickhouse-client -q "system flush logs" ||:
 stop_logs_replication
+clickhouse stop
 
 mv /var/log/clickhouse-server/* /test_output/
 

From 7196103be5bf2d937ca0422f01e21dfad94978ba Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 12 Dec 2023 17:41:16 +0100
Subject: [PATCH 179/213] Always recreate ephemeral "alive" node on
 reconnection.

---
 src/Backups/BackupCoordinationRemote.cpp  |  8 +++++---
 src/Backups/RestoreCoordinationRemote.cpp | 10 +++++-----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 7319b1aba58..b659887e0da 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -184,10 +184,12 @@ BackupCoordinationRemote::BackupCoordinationRemote(
             if (my_is_internal)
             {
                 String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
+
+                /// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically.
+                zk->tryRemove(alive_node_path);
+
                 zk->createAncestors(alive_node_path);
-                auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
-                if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
-                    throw zkutil::KeeperException::fromPath(code, alive_node_path);
+                zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral);
             }
         })
 {
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index 60a83c580f0..190634de4a9 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -43,12 +43,12 @@ RestoreCoordinationRemote::RestoreCoordinationRemote(
             if (my_is_internal)
             {
                 String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
-                auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
 
-                if (code == Coordination::Error::ZNODEEXISTS)
-                    zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
-                else if (code != Coordination::Error::ZOK)
-                    throw zkutil::KeeperException::fromPath(code, alive_node_path);
+                /// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically.
+                zk->tryRemove(alive_node_path);
+
+                zk->createAncestors(alive_node_path);
+                zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral);
             }
         })
 {

From b31816cc901707ec87eb7c531487467146468d12 Mon Sep 17 00:00:00 2001
From: Ilya Golshtein <igolshtein@altinity.com>
Date: Tue, 12 Dec 2023 16:48:13 +0000
Subject: [PATCH 180/213] atomic_set_in_librdkafka: update librdkafka submodule

---
 contrib/librdkafka | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/librdkafka b/contrib/librdkafka
index 6f3b483426a..2d2aab6f5b7 160000
--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@@ -1 +1 @@
-Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f
+Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082

From 64d7abde099bfc516e0fb630b581b19c3a548279 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 12 Dec 2023 16:51:23 +0000
Subject: [PATCH 181/213] fix result of external aggregation in case of
 partially materialized projection

---
 .../Transforms/AggregatingTransform.cpp       | 22 ++++---
 ...projections_external_aggregation.reference | 41 ++++++++++++
 ...02941_projections_external_aggregation.sql | 66 +++++++++++++++++++
 3 files changed, 121 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/02941_projections_external_aggregation.reference
 create mode 100644 tests/queries/0_stateless/02941_projections_external_aggregation.sql

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index bf475c57d36..1f52ed97491 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -726,8 +726,11 @@ void AggregatingTransform::initGenerate()
             auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants);
             Pipes pipes;
             for (auto & variant : prepared_data)
+            {
                 /// Converts hash tables to blocks with data (finalized or not).
                 pipes.emplace_back(std::make_shared<ConvertingAggregatedToChunksSource>(params, variant));
+            }
+
             Pipe pipe = Pipe::unitePipes(std::move(pipes));
             if (!pipe.empty())
             {
@@ -781,21 +784,23 @@ void AggregatingTransform::initGenerate()
             }
         }
 
-        const auto & tmp_data = params->aggregator.getTemporaryData();
+        size_t num_streams = 0;
+        size_t compressed_size = 0;
+        size_t uncompressed_size = 0;
 
-        Pipe pipe;
+        Pipes pipes;
+        /// Merge external data from all aggregators used in query.
+        for (const auto & aggregator : *params->aggregator_list_ptr)
         {
-            Pipes pipes;
-
+            const auto & tmp_data = aggregator.getTemporaryData();
             for (auto * tmp_stream : tmp_data.getStreams())
                 pipes.emplace_back(Pipe(std::make_unique<SourceFromNativeStream>(tmp_stream)));
 
-            pipe = Pipe::unitePipes(std::move(pipes));
+            num_streams += tmp_data.getStreams().size();
+            compressed_size += tmp_data.getStat().compressed_size;
+            uncompressed_size += tmp_data.getStat().uncompressed_size;
         }
 
-        size_t num_streams = tmp_data.getStreams().size();
-        size_t compressed_size = tmp_data.getStat().compressed_size;
-        size_t uncompressed_size = tmp_data.getStat().uncompressed_size;
         LOG_DEBUG(
             log,
             "Will merge {} temporary files of size {} compressed, {} uncompressed.",
@@ -803,6 +808,7 @@ void AggregatingTransform::initGenerate()
             ReadableSize(compressed_size),
             ReadableSize(uncompressed_size));
 
+        auto pipe = Pipe::unitePipes(std::move(pipes));
         addMergingAggregatedMemoryEfficientTransform(pipe, params, temporary_data_merge_threads);
 
         processors = Pipe::detachProcessors(std::move(pipe));
diff --git a/tests/queries/0_stateless/02941_projections_external_aggregation.reference b/tests/queries/0_stateless/02941_projections_external_aggregation.reference
new file mode 100644
index 00000000000..4b1a62520cd
--- /dev/null
+++ b/tests/queries/0_stateless/02941_projections_external_aggregation.reference
@@ -0,0 +1,41 @@
+*** correct aggregation ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** correct aggregation with projection ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** after materialization ***
+*** correct aggregation ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** correct aggregation with projection ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
+*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***
+1	0	0	1249950000
+1	0	2	1250000000
+1	1	1	1249975000
+1	1	3	1250025000
diff --git a/tests/queries/0_stateless/02941_projections_external_aggregation.sql b/tests/queries/0_stateless/02941_projections_external_aggregation.sql
new file mode 100644
index 00000000000..5053773f142
--- /dev/null
+++ b/tests/queries/0_stateless/02941_projections_external_aggregation.sql
@@ -0,0 +1,66 @@
+DROP TABLE IF EXISTS t_proj_external;
+
+CREATE TABLE t_proj_external
+(
+    k1 UInt32,
+    k2 UInt32,
+    k3 UInt32,
+    value UInt32
+)
+ENGINE = MergeTree
+ORDER BY tuple();
+
+INSERT INTO t_proj_external SELECT 1, number%2, number%4, number FROM numbers(50000);
+
+SYSTEM STOP MERGES t_proj_external;
+
+ALTER TABLE t_proj_external ADD PROJECTION aaaa (
+    SELECT
+        k1,
+        k2,
+        k3,
+        sum(value)
+    GROUP BY k1, k2, k3
+);
+
+INSERT INTO t_proj_external SELECT 1, number%2, number%4, number FROM numbers(100000) LIMIT 50000, 100000;
+
+SELECT '*** correct aggregation ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_use_projections = 0;
+
+SELECT '*** correct aggregation with projection ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3;
+
+SELECT '*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1;
+
+SELECT '*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1;
+
+SYSTEM START MERGES t_proj_external;
+
+ALTER TABLE t_proj_external MATERIALIZE PROJECTION aaaa SETTINGS mutations_sync = 2;
+
+SELECT '*** after materialization ***';
+
+SELECT '*** correct aggregation ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_use_projections = 0;
+
+SELECT '*** correct aggregation with projection ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3;
+
+SELECT '*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1;
+
+SELECT '*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***';
+
+SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1;
+
+DROP TABLE IF EXISTS t_proj_external;

From e4e97471a6b2a6b7617473c7be62494d6098f0d6 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 12 Dec 2023 17:31:56 +0000
Subject: [PATCH 182/213] fix totals in aggregation functions with Map
 combinator

---
 .../AggregateFunctionSumMap.cpp               |  11 +-
 .../02480_max_map_null_totals.reference       | 108 +++++++++---------
 .../0_stateless/02480_max_map_null_totals.sql |  54 ++++-----
 3 files changed, 91 insertions(+), 82 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
index 04bc908396a..9f0873a6c9c 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp
@@ -254,11 +254,20 @@ public:
             if (it != merged_maps.end())
             {
                 for (size_t col = 0; col < values_types.size(); ++col)
+                {
                     if (!elem.second[col].isNull())
-                        applyVisitor(Visitor(elem.second[col]), it->second[col]);
+                    {
+                        if (it->second[col].isNull())
+                            it->second[col] = elem.second[col];
+                        else
+                            applyVisitor(Visitor(elem.second[col]), it->second[col]);
+                    }
+                }
             }
             else
+            {
                 merged_maps[elem.first] = elem.second;
+            }
         }
     }
 
diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.reference b/tests/queries/0_stateless/02480_max_map_null_totals.reference
index 5cc9b5a495f..8fa02ad2a39 100644
--- a/tests/queries/0_stateless/02480_max_map_null_totals.reference
+++ b/tests/queries/0_stateless/02480_max_map_null_totals.reference
@@ -1,119 +1,119 @@
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
-([-1,0,1,2],[0,0,0,2])
+([-1,0,1,2],[0,0,1,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
-([-1,0,1,2],[0,0,0,2])
+([-1,0,1,2],[0,0,1,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([0],[0])
-([2],[2])
 ([1],[1])
+([2],[2])
 
-([0,2],[0,2])
+([0,1,2],[0,1,2])
 ([0],[0])
-([2],[2])
+([0,1,2],[0,1,2])
 ([1],[1])
-([0,2],[0,2])
+([2],[2])
 ([0],[0])
-([2],[2])
+([0,1,2],[0,1,2])
 ([1],[1])
-([0,2],[0,2])
+([2],[2])
 -
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
-([-1,0,1,2],[0,0,0,2])
+([-1,0,1,2],[0,0,1,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
-([-1,0,1,2],[0,0,0,2])
+([-1,0,1,2],[0,0,1,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([0],[0])
-([2],[2])
 ([1],[1])
+([2],[2])
 
-([0,2],[0,2])
+([0,1,2],[0,1,2])
 ([0],[0])
-([2],[2])
+([0,1,2],[0,1,2])
 ([1],[1])
-([0,2],[0,2])
+([2],[2])
 ([0],[0])
-([2],[2])
+([0,1,2],[0,1,2])
 ([1],[1])
-([0,2],[0,2])
+([2],[2])
 -
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
-([-1,0,1,2],[0,0,0,2])
+([-1,0,1,2],[0,0,1,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
-([-1,0,1,2],[0,0,0,2])
+([-1,0,1,2],[0,0,1,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
+([-1,0,1,2],[0,0,1,2])
 ([0,1],[0,1])
-([-1,0,1,2],[0,0,0,2])
+([1,2],[0,2])
 ([0],[0])
-([2],[2])
 ([1],[1])
+([2],[2])
 
-([0,2],[0,2])
+([0,1,2],[0,1,2])
 ([0],[0])
-([2],[2])
+([0,1,2],[0,1,2])
 ([1],[1])
-([0,2],[0,2])
+([2],[2])
 ([0],[0])
-([2],[2])
+([0,1,2],[0,1,2])
 ([1],[1])
-([0,2],[0,2])
+([2],[2])
diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.sql b/tests/queries/0_stateless/02480_max_map_null_totals.sql
index 81e2a5c4243..2c970e25fd5 100644
--- a/tests/queries/0_stateless/02480_max_map_null_totals.sql
+++ b/tests/queries/0_stateless/02480_max_map_null_totals.sql
@@ -1,39 +1,39 @@
-SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
 SELECT '-';
 
-SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
 SELECT '-';
 
-SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
\ No newline at end of file

From 028763def5313debef322ffabaedbb4c3a9cdcd6 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Tue, 12 Dec 2023 18:53:52 +0100
Subject: [PATCH 183/213] Simplify logic in
 BackupCoordinationStageSync::readCurrentState() and return earlier from the
 cycly on a connection problem.

---
 src/Backups/BackupCoordinationStageSync.cpp | 56 ++++++++++++---------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/Backups/BackupCoordinationStageSync.cpp b/src/Backups/BackupCoordinationStageSync.cpp
index e4dac7dbbe9..cedcecfd35c 100644
--- a/src/Backups/BackupCoordinationStageSync.cpp
+++ b/src/Backups/BackupCoordinationStageSync.cpp
@@ -100,19 +100,19 @@ Strings BackupCoordinationStageSync::waitFor(const Strings & all_hosts, const St
 
 namespace
 {
-    struct UnreadyHostState
+    struct UnreadyHost
     {
+        String host;
         bool started = false;
-        bool alive = false;
     };
 }
 
 struct BackupCoordinationStageSync::State
 {
-    Strings results;
-    std::map<String, UnreadyHostState> unready_hosts;
+    std::optional<Strings> results;
     std::optional<std::pair<String, Exception>> error;
     std::optional<String> disconnected_host;
+    std::optional<UnreadyHost> unready_host;
 };
 
 BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
@@ -137,39 +137,45 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
         return state;
     }
 
+    std::optional<UnreadyHost> unready_host;
+
     for (const auto & host : all_hosts)
     {
         if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
         {
             const String started_node_name = "started|" + host;
             const String alive_node_name = "alive|" + host;
-            const String alive_node_path = zookeeper_path + "/" + alive_node_name;
 
-            UnreadyHostState unready_host_state;
-            unready_host_state.started = zk_nodes_set.contains(started_node_name);
-            unready_host_state.alive = zk_nodes_set.contains(alive_node_name);
-            state.unready_hosts.emplace(host, unready_host_state);
+            bool started = zk_nodes_set.contains(started_node_name);
+            bool alive = zk_nodes_set.contains(alive_node_name);
 
-            if (!unready_host_state.alive && !state.disconnected_host)
+            if (!alive)
             {
                 /// If the "alive" node doesn't exist then we don't have connection to the corresponding host.
                 /// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait.
                 /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
-                state.disconnected_host = host;
                 String message;
-                if (unready_host_state.started)
+                if (started)
                     message = fmt::format("Lost connection to host {}", host);
                 else
                     message = fmt::format("No connection to host {} yet", host);
                 if (!retries_ctl.isLastRetry())
                     message += ", will retry";
                 retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message);
+                state.disconnected_host = host;
+                return state;
             }
+
+            if (!unready_host)
+                unready_host.emplace(UnreadyHost{.host = host, .started = started});
         }
     }
 
-    if (state.disconnected_host || !state.unready_hosts.empty())
+    if (unready_host)
+    {
+        state.unready_host = std::move(unready_host);
         return state;
+    }
 
     Strings results;
     for (const auto & host : all_hosts)
@@ -215,12 +221,16 @@ Strings BackupCoordinationStageSync::waitImpl(
         }
 
         /// Analyze the current state of zk nodes.
-        if (state.error || state.disconnected_host || state.unready_hosts.empty())
-            break; /// Error happened or everything is ready.
+        chassert(state.results || state.error || state.disconnected_host || state.unready_host);
 
-        /// Log that we will wait
-        const auto & unready_host = state.unready_hosts.begin()->first;
-        LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {})", unready_host);
+        if (state.results || state.error || state.disconnected_host)
+            break; /// Everything is ready or error happened.
+
+        /// Log what we will wait.
+        const auto & unready_host = *state.unready_host;
+        LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {}{})",
+                 unready_host.host,
+                 (!unready_host.started ? " which didn't start the operation yet" : ""));
 
         /// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
         {
@@ -247,19 +257,19 @@ Strings BackupCoordinationStageSync::waitImpl(
         throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "No connection to host {}", *state.disconnected_host);
 
     /// Something's unready, timeout is probably not enough.
-    if (!state.unready_hosts.empty())
+    if (state.unready_host)
     {
-        const auto & [unready_host, unready_host_state] = *state.unready_hosts.begin();
+        const auto & unready_host = *state.unready_host;
         throw Exception(
             ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
             "Waited for host {} too long (> {}){}",
-            unready_host,
+            unready_host.host,
             to_string(*timeout),
-            unready_host_state.started ? "" : ": Operation didn't start");
+            unready_host.started ? "" : ": Operation didn't start");
     }
 
     LOG_TRACE(log, "Everything is Ok. All hosts achieved stage {}", stage_to_wait);
-    return state.results;
+    return std::move(*state.results);
 }
 
 }

From 49aad9c88e9ce4aea771b28a1fa8a4816cb481a4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 19:33:08 +0100
Subject: [PATCH 184/213] Maybe better

---
 docker/test/clickbench/run.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 5d2312c22c5..255ff46b0bc 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -19,9 +19,9 @@ echo "
 profiles:
     default:
         allow_introspection_functions: 1
-" > /etc/clickhouse-server/allow_introspection_functions.yaml
+" > /etc/clickhouse-server/users.d/allow_introspection_functions.yaml
 
-config_logs_export_cluster /etc/clickhouse-server/users.d/system_logs_export.yaml
+config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
 
 clickhouse start
 

From 4c1860b9b4e499c736f8e85cef98afe27e35db65 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 21:21:58 +0100
Subject: [PATCH 185/213] Fix a mistake

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 6e3721956c0..ea82e071112 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -21,7 +21,7 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
 
 # trace_log needs more columns for symbolization
 EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
-EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines"
+EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace)::Array(LowCardinality(String)) AS symbols, arrayMap(x -> addressToLine(x), trace)::Array(LowCardinality(String)) AS lines"
 
 
 function __set_connection_args

From 2099130bd2d66b8f2d9b87e27c833fdddaebc723 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 21:28:28 +0100
Subject: [PATCH 186/213] Enable text_log

---
 docker/test/clickbench/run.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 255ff46b0bc..921d2023fd7 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -21,6 +21,11 @@ profiles:
         allow_introspection_functions: 1
 " > /etc/clickhouse-server/users.d/allow_introspection_functions.yaml
 
+# Enable text_log
+echo "
+text_log:
+" > /etc/clickhouse-server/config.d/text_log.yaml
+
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
 
 clickhouse start

From d51aaddf12119e45525a12112557c0595422a2b3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 22:15:19 +0100
Subject: [PATCH 187/213] Use the local region

---
 docker/test/clickbench/create.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql
index 620bdf09331..a57faf35837 100644
--- a/docker/test/clickbench/create.sql
+++ b/docker/test/clickbench/create.sql
@@ -109,4 +109,4 @@ ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955'
 )
 ENGINE = MergeTree
 SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G',
-         disk = disk(type = web, endpoint = 'https://clickhouse-public-datasets.s3.amazonaws.com/web/'));
+         disk = disk(type = web, endpoint = 'https://clickhouse-datasets.s3.amazonaws.com/web/'));

From 7f4a028196e53dca878ecda6c4449730891572b3 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 12 Dec 2023 21:26:58 +0000
Subject: [PATCH 188/213] Test and fix

---
 .../ParallelReplicasReadingCoordinator.cpp    |  13 +-
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  22 ++++
 .../test.py                                   | 122 ++++++++++++++++++
 4 files changed, 154 insertions(+), 3 deletions(-)
 create mode 100644 tests/integration/test_parallel_replicas_working_set/__init__.py
 create mode 100644 tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
 create mode 100644 tests/integration/test_parallel_replicas_working_set/test.py

diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index d81f5dd41ce..c6edb1049f4 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -164,11 +164,11 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc
     for (auto && part_ranges: announcement.description)
     {
         Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}};
+        const MergeTreePartInfo & announced_part = part.description.info;
 
         auto it = std::lower_bound(cbegin(all_parts_to_read), cend(all_parts_to_read), part);
         if (it != all_parts_to_read.cend())
         {
-            const MergeTreePartInfo & announced_part = part.description.info;
             const MergeTreePartInfo & found_part = it->description.info;
             if (found_part == announced_part)
             {
@@ -183,13 +183,20 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc
                 bool is_disjoint = found_part.isDisjoint(announced_part);
                 if (it != all_parts_to_read.cbegin() && is_disjoint)
                 {
-                    const MergeTreePartInfo & lesser_part_info = (--it)->description.info;
-                    is_disjoint &= lesser_part_info.isDisjoint(announced_part);
+                    const MergeTreePartInfo & lesser_part = (--it)->description.info;
+                    is_disjoint &= lesser_part.isDisjoint(announced_part);
                 }
                 if (!is_disjoint)
                     continue;
             }
         }
+        else if (!all_parts_to_read.empty())
+        {
+            /// the announced part is greatest - check if it's disjoint with lesser part
+            const MergeTreePartInfo & lesser_part = all_parts_to_read.crbegin()->description.info;
+            if (!lesser_part.isDisjoint(announced_part))
+                continue;
+        }
 
         auto [insert_it, _] = all_parts_to_read.emplace(std::move(part));
         parts_diff.push_back(insert_it);
diff --git a/tests/integration/test_parallel_replicas_working_set/__init__.py b/tests/integration/test_parallel_replicas_working_set/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
new file mode 100644
index 00000000000..02a315479f8
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
@@ -0,0 +1,22 @@
+<clickhouse>
+    <remote_servers>
+        <test_single_shard_multiple_replicas>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>n1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>n3</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_single_shard_multiple_replicas>
+    </remote_servers>
+</clickhouse>
+
diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
new file mode 100644
index 00000000000..7b93c2fcf4b
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_working_set/test.py
@@ -0,0 +1,122 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+nodes = [
+    cluster.add_instance(
+        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+    )
+    for i in (1, 2, 3)
+]
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def create_tables(cluster, table_name, node_with_covering_part):
+
+    # create replicated tables
+    for node in nodes:
+        node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
+
+    nodes[0].query(
+        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1')
+            ORDER BY (key)"""
+    )
+    nodes[1].query(
+        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2')
+            ORDER BY (key)"""
+    )
+    nodes[2].query(
+        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3')
+            ORDER BY (key)"""
+    )
+    # stop merges
+    for i in (0, 1, 2):
+        if i != node_with_covering_part:
+            nodes[i].query(f"system stop fetches {table_name}")
+
+    # populate data, equal number of rows for each replica
+    nodes[0].query(
+        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10)",
+    )
+    nodes[0].query(
+        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10, 10)"
+    )
+    nodes[1].query(
+        f"INSERT INTO {table_name} SELECT number, number FROM numbers(20, 10)"
+    )
+    nodes[1].query(
+        f"INSERT INTO {table_name} SELECT number, number FROM numbers(30, 10)"
+    )
+    nodes[2].query(
+        f"INSERT INTO {table_name} SELECT number, number FROM numbers(40, 10)"
+    )
+    nodes[2].query(
+        f"INSERT INTO {table_name} SELECT number, number FROM numbers(50, 10)"
+    )
+    nodes[node_with_covering_part].query(f"system sync replica {table_name}")
+    nodes[node_with_covering_part].query(f"optimize table {table_name}")
+
+    # check we have expected set of parts
+    expected_active_parts = ""
+    if node_with_covering_part == 0:
+        expected_active_parts = "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n"
+
+    if node_with_covering_part == 1:
+        expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n"
+
+    if node_with_covering_part == 2:
+        expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n"
+
+    assert (nodes[0].query(f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name") == expected_active_parts)
+
+
+
+@pytest.mark.parametrize("node_with_covering_part", [0, 1, 2])
+def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
+    """create and populate table in special way (see create_table()),
+       node_with_covering_part contains all parts merged into one,
+       other nodes contain only parts which are result of insert via the node
+    """
+
+    cluster = "test_single_shard_multiple_replicas"
+    table_name = "test_table"
+    create_tables(cluster, table_name, node_with_covering_part)
+
+    expected_full_result = "60\t0\t59\t1770\n"
+    expected_results = {expected_full_result}
+    if node_with_covering_part == 0:
+         expected_results.add("40\t20\t59\t1580\n")
+    if node_with_covering_part == 1:
+         expected_results.add("40\t0\t59\t1180\n")
+    if node_with_covering_part == 2:
+         expected_results.add("40\t0\t39\t780\n")
+
+    # parallel replicas
+    result = nodes[0].query(
+            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
+            settings={
+                "allow_experimental_parallel_reading_from_replicas": 2,
+                "prefer_localhost_replica": 0,
+                "max_parallel_replicas": 3,
+                "use_hedged_requests": 0,
+                "cluster_for_parallel_replicas": cluster
+            },
+        )
+    assert(result in expected_results)
+
+    # w/o parallel replicas
+    assert (
+        nodes[node_with_covering_part].query(
+            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}"
+        )
+        == expected_full_result
+    )

From 3333a7f2194a7699cdf30f302dc1b0426f9c026d Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 12 Dec 2023 21:39:08 +0000
Subject: [PATCH 189/213] Test cleanup

---
 .../integration/test_parallel_replicas_working_set/test.py  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
index 7b93c2fcf4b..747ad7ec89f 100644
--- a/tests/integration/test_parallel_replicas_working_set/test.py
+++ b/tests/integration/test_parallel_replicas_working_set/test.py
@@ -79,7 +79,6 @@ def create_tables(cluster, table_name, node_with_covering_part):
     assert (nodes[0].query(f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name") == expected_active_parts)
 
 
-
 @pytest.mark.parametrize("node_with_covering_part", [0, 1, 2])
 def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
     """create and populate table in special way (see create_table()),
@@ -116,7 +115,10 @@ def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
     # w/o parallel replicas
     assert (
         nodes[node_with_covering_part].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}"
+            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
+            settings={
+                "allow_experimental_parallel_reading_from_replicas": 0,
+            },
         )
         == expected_full_result
     )

From ea86b33d3bb498e989dbdee1b43b892ee077cf4d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 12 Dec 2023 21:59:08 +0000
Subject: [PATCH 190/213] Automatic style fix

---
 .../test.py                                   | 50 +++++++++++--------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
index 747ad7ec89f..04768694cc4 100644
--- a/tests/integration/test_parallel_replicas_working_set/test.py
+++ b/tests/integration/test_parallel_replicas_working_set/test.py
@@ -21,7 +21,6 @@ def start_cluster():
 
 
 def create_tables(cluster, table_name, node_with_covering_part):
-
     # create replicated tables
     for node in nodes:
         node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
@@ -68,22 +67,33 @@ def create_tables(cluster, table_name, node_with_covering_part):
     # check we have expected set of parts
     expected_active_parts = ""
     if node_with_covering_part == 0:
-        expected_active_parts = "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n"
+        expected_active_parts = (
+            "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n"
+        )
 
     if node_with_covering_part == 1:
-        expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n"
+        expected_active_parts = (
+            "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n"
+        )
 
     if node_with_covering_part == 2:
-        expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n"
+        expected_active_parts = (
+            "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n"
+        )
 
-    assert (nodes[0].query(f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name") == expected_active_parts)
+    assert (
+        nodes[0].query(
+            f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name"
+        )
+        == expected_active_parts
+    )
 
 
 @pytest.mark.parametrize("node_with_covering_part", [0, 1, 2])
 def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
     """create and populate table in special way (see create_table()),
-       node_with_covering_part contains all parts merged into one,
-       other nodes contain only parts which are result of insert via the node
+    node_with_covering_part contains all parts merged into one,
+    other nodes contain only parts which are result of insert via the node
     """
 
     cluster = "test_single_shard_multiple_replicas"
@@ -93,24 +103,24 @@ def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
     expected_full_result = "60\t0\t59\t1770\n"
     expected_results = {expected_full_result}
     if node_with_covering_part == 0:
-         expected_results.add("40\t20\t59\t1580\n")
+        expected_results.add("40\t20\t59\t1580\n")
     if node_with_covering_part == 1:
-         expected_results.add("40\t0\t59\t1180\n")
+        expected_results.add("40\t0\t59\t1180\n")
     if node_with_covering_part == 2:
-         expected_results.add("40\t0\t39\t780\n")
+        expected_results.add("40\t0\t39\t780\n")
 
     # parallel replicas
     result = nodes[0].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 2,
-                "prefer_localhost_replica": 0,
-                "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
-                "cluster_for_parallel_replicas": cluster
-            },
-        )
-    assert(result in expected_results)
+        f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
+        settings={
+            "allow_experimental_parallel_reading_from_replicas": 2,
+            "prefer_localhost_replica": 0,
+            "max_parallel_replicas": 3,
+            "use_hedged_requests": 0,
+            "cluster_for_parallel_replicas": cluster,
+        },
+    )
+    assert result in expected_results
 
     # w/o parallel replicas
     assert (

From 2043791ed76d040c8f05f5ad856bb599512da15c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 12 Dec 2023 23:37:55 +0100
Subject: [PATCH 191/213] Fix typo

---
 docker/test/stateful/s3downloader | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader
index 96f2aa96dd5..77601fb5af6 100755
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@@ -30,7 +30,7 @@ def build_url(base_url, dataset):
     return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
 
 
-def dowload_with_progress(url, path):
+def download_with_progress(url, path):
     logging.info("Downloading from %s to temp path %s", url, path)
     for i in range(RETRIES_COUNT):
         try:
@@ -110,7 +110,7 @@ if __name__ == "__main__":
         temp_archive_path = _get_temp_file_name()
         try:
             download_url_for_dataset = build_url(args.url_prefix, dataset)
-            dowload_with_progress(download_url_for_dataset, temp_archive_path)
+            download_with_progress(download_url_for_dataset, temp_archive_path)
             unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
         except Exception as ex:
             logging.info("Some exception occured %s", str(ex))

From f11b90e7bbec13ba27989442e359f818d8e85088 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Dec 2023 00:10:58 +0100
Subject: [PATCH 192/213] Allow buckets without List access

---
 src/Storages/StorageMergeTree.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index e9a0dd5fbf3..16f4122d605 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -829,8 +829,13 @@ void StorageMergeTree::loadDeduplicationLog()
 
     auto disk = getDisks()[0];
     std::string path = fs::path(relative_data_path) / "deduplication_logs";
-    deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version, disk);
-    deduplication_log->load();
+
+    /// If either there is already a deduplication log, or we will be able to use it.
+    if (disk->exists(path) || !disk->isReadOnly())
+    {
+        deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version, disk);
+        deduplication_log->load();
+    }
 }
 
 void StorageMergeTree::loadMutations()

From 9f5299e118fc536f0ec9deb224c6ed6028362743 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Dec 2023 00:11:10 +0100
Subject: [PATCH 193/213] Use a new bucket

---
 docker/test/clickbench/create.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql
index a57faf35837..9f18a47474b 100644
--- a/docker/test/clickbench/create.sql
+++ b/docker/test/clickbench/create.sql
@@ -109,4 +109,4 @@ ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955'
 )
 ENGINE = MergeTree
 SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G',
-         disk = disk(type = web, endpoint = 'https://clickhouse-datasets.s3.amazonaws.com/web/'));
+         disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));

From bb22ce6ec87063086ef5f6525d7d40dd72cfe88b Mon Sep 17 00:00:00 2001
From: Mikhail Koviazin <mikhail.koviazin@aiven.io>
Date: Wed, 13 Dec 2023 07:17:56 +0000
Subject: [PATCH 194/213] fix clickhouse-client invocation in
 02327_capnproto_protobuf_empty_messages

The test relies on `clickhouse-client` to be in `$PATH`, which is a wrong
assumption. This commit makes it use `$CLICKHOUSE_CLIENT_BINARY` instead.
---
 .../0_stateless/02327_capnproto_protobuf_empty_messages.sh    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh
index 69e65112305..dfc0dedeaf1 100755
--- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh
+++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh
@@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 touch $USER_FILES_PATH/data.capnp
 
-SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)")
+SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)")
 CLIENT_SCHEMADIR=$CURDIR/format_schemas
 SERVER_SCHEMADIR=test_02327
 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR

From 5abeadf20fbe3f3697d60504ae6ae53b9f653900 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 13 Dec 2023 08:53:04 +0000
Subject: [PATCH 195/213] Fix: stop merges, otherwise test can be flaky

---
 tests/integration/test_parallel_replicas_working_set/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
index 04768694cc4..6292d33ed3e 100644
--- a/tests/integration/test_parallel_replicas_working_set/test.py
+++ b/tests/integration/test_parallel_replicas_working_set/test.py
@@ -37,10 +37,12 @@ def create_tables(cluster, table_name, node_with_covering_part):
         f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3')
             ORDER BY (key)"""
     )
-    # stop merges
+    # stop merges to keep original parts
+    # stop fetches to keep only parts created on the nodes
     for i in (0, 1, 2):
         if i != node_with_covering_part:
             nodes[i].query(f"system stop fetches {table_name}")
+            nodes[i].query(f"system stop merges {table_name}")
 
     # populate data, equal number of rows for each replica
     nodes[0].query(

From 7762beaf6cd64f2553e81db5f7b1e5ba4ea4d8bd Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 13 Dec 2023 11:23:53 +0000
Subject: [PATCH 196/213] Fix: w/o replicas sync query result can vary

---
 .../test_parallel_replicas_over_distributed/test.py  | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py
index ecfc2ddea63..aecc0fcdcb8 100644
--- a/tests/integration/test_parallel_replicas_over_distributed/test.py
+++ b/tests/integration/test_parallel_replicas_over_distributed/test.py
@@ -129,6 +129,9 @@ def test_parallel_replicas_over_distributed(
     node = nodes[0]
     expected_result = f"6003\t-1999\t1999\t3\n"
 
+    # sync all replicas to get consistent result
+    node.query(f"SYSTEM SYNC REPLICA ON CLUSTER {cluster} {table_name}")
+
     # parallel replicas
     assert (
         node.query(
@@ -143,11 +146,12 @@ def test_parallel_replicas_over_distributed(
         == expected_result
     )
 
-    # sync all replicas to get consistent result by next distributed query
-    node.query(f"SYSTEM SYNC REPLICA ON CLUSTER {cluster} {table_name}")
-
     # w/o parallel replicas
     assert (
-        node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d")
+        node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
+            settings={
+                "allow_experimental_parallel_reading_from_replicas": 0,
+           }
+        )
         == expected_result
     )

From 7d9e9fd42eab9ae926d7bbd748627e8272c6afec Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 13 Dec 2023 11:38:41 +0000
Subject: [PATCH 197/213] Automatic style fix

---
 .../test_parallel_replicas_over_distributed/test.py          | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py
index aecc0fcdcb8..ebff0309a4f 100644
--- a/tests/integration/test_parallel_replicas_over_distributed/test.py
+++ b/tests/integration/test_parallel_replicas_over_distributed/test.py
@@ -148,10 +148,11 @@ def test_parallel_replicas_over_distributed(
 
     # w/o parallel replicas
     assert (
-        node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
+        node.query(
+            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 0,
-           }
+            },
         )
         == expected_result
     )

From 8c2137e0c62721d6867cc252d9f2985e6b9d5339 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 13 Dec 2023 13:09:36 +0100
Subject: [PATCH 198/213] Revert "Merge pull request #57741 from
 ucasfl/negtive-position"

This reverts commit 3d846800e0bdd94916ed8b8faf1c1bc7868ca933, reversing
changes made to b31b4c932f78c8ea4f65657f88d65b494de15db0.
---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 27 ++----
 .../replaceForPositionalArguments.cpp         | 25 +----
 .../0_stateless/01162_strange_mutations.sh    |  2 +-
 .../0_stateless/01798_having_push_down.sql    |  3 +-
 .../02006_test_positional_arguments.reference | 94 -------------------
 .../02006_test_positional_arguments.sql       | 21 -----
 .../02932_group_by_null_fuzzer.sql            |  1 -
 7 files changed, 14 insertions(+), 159 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index a3b461f32ea..1e63d5ca8e4 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2153,32 +2153,19 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
             node_to_replace = &sort_node->getExpression();
 
         auto * constant_node = (*node_to_replace)->as<ConstantNode>();
-
-        if (!constant_node
-            || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64))
+        if (!constant_node || constant_node->getValue().getType() != Field::Types::UInt64)
             continue;
 
-        UInt64 pos;
-        if (constant_node->getValue().getType() == Field::Types::UInt64)
-        {
-            pos = constant_node->getValue().get<UInt64>();
-        }
-        else // Int64
-        {
-            auto value = constant_node->getValue().get<Int64>();
-            pos = value > 0 ? value : projection_nodes.size() + value + 1;
-        }
-
-
-        if (!pos || pos > projection_nodes.size())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
+        UInt64 positional_argument_number = constant_node->getValue().get<UInt64>();
+        if (positional_argument_number == 0 || positional_argument_number > projection_nodes.size())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}",
-                pos,
+                positional_argument_number,
                 projection_nodes.size(),
                 scope.scope_node->formatASTForErrorMessage());
 
-        *node_to_replace = projection_nodes[--pos];
+        --positional_argument_number;
+        *node_to_replace = projection_nodes[positional_argument_number];
     }
 }
 
diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp
index 8306da17f52..241dd7cf92c 100644
--- a/src/Interpreters/replaceForPositionalArguments.cpp
+++ b/src/Interpreters/replaceForPositionalArguments.cpp
@@ -27,29 +27,14 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel
         return false;
 
     auto which = ast_literal->value.getType();
-    if (which != Field::Types::UInt64 && which != Field::Types::Int64)
+    if (which != Field::Types::UInt64)
         return false;
 
-    UInt64 pos;
-
-    if (which == Field::Types::UInt64)
-    {
-        pos = ast_literal->value.get<UInt64>();
-    }
-    else if (which == Field::Types::Int64)
-    {
-        auto value = ast_literal->value.get<Int64>();
-        pos = value > 0 ? value : columns.size() + value + 1;
-    }
-    else
-    {
-        return false;
-    }
-
-
+    auto pos = ast_literal->value.get<UInt64>();
     if (!pos || pos > columns.size())
-        throw Exception(
-            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Positional argument out of bounds: {} (expected in range [1, {}]", pos, columns.size());
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Positional argument out of bounds: {} (expected in range [1, {}]",
+                        pos, columns.size());
 
     const auto & column = columns[--pos];
     if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))
diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh
index f6b31847c1e..eea9ea5f7e5 100755
--- a/tests/queries/0_stateless/01162_strange_mutations.sh
+++ b/tests/queries/0_stateless/01162_strange_mutations.sh
@@ -28,7 +28,7 @@ do
     $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica"
     $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test"
     if [[ $engine == *"ReplicatedMergeTree"* ]]; then
-        $CLICKHOUSE_CLIENT --enable_positional_arguments=0 -q "ALTER TABLE test
+        $CLICKHOUSE_CLIENT -q "ALTER TABLE test
             UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 'dummy')[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "statement with subquery may be nondeterministic"
         $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "ALTER TABLE test
                     UPDATE test = (SELECT groupArray(id) FROM t1)[n - 99] WHERE 1"
diff --git a/tests/queries/0_stateless/01798_having_push_down.sql b/tests/queries/0_stateless/01798_having_push_down.sql
index c0c3447f5ab..b3a77c8f5b5 100644
--- a/tests/queries/0_stateless/01798_having_push_down.sql
+++ b/tests/queries/0_stateless/01798_having_push_down.sql
@@ -8,12 +8,11 @@ SELECT sum(c0 = 0), min(c0 + 1), sum(c0 + 2) FROM t_having
 GROUP BY c0 HAVING c0 = 0
 SETTINGS enable_optimize_predicate_expression=0;
 
-SET enable_positional_arguments=0;
-
 SELECT c0 + -1, sum(intDivOrZero(intDivOrZero(NULL, NULL), '2'), intDivOrZero(10000000000., intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), 10), NULL))) FROM t_having GROUP BY c0 = 2, c0 = 10, intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), NULL), NULL), c0 HAVING c0 = 2 SETTINGS enable_optimize_predicate_expression = 0;
 
 SELECT sum(c0 + 257) FROM t_having GROUP BY c0 = -9223372036854775808, NULL, -2147483649, c0 HAVING c0 = -9223372036854775808 SETTINGS enable_optimize_predicate_expression = 0;
 
+SET enable_positional_arguments=0;
 SELECT c0 + -2, c0 + -9223372036854775807, c0 = NULL FROM t_having GROUP BY c0 = 0.9998999834060669, 1023, c0 HAVING c0 = 0.9998999834060669 SETTINGS enable_optimize_predicate_expression = 0;
 
 DROP TABLE t_having;
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference
index 079bd071103..40100e8d5be 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments.reference
+++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference
@@ -3,50 +3,18 @@ select x3, x2, x1 from test order by 1;
 1	100	100
 10	1	10
 100	10	1
-select x3, x2, x1 from test order by -3;
-1	100	100
-10	1	10
-100	10	1
 select x3, x2, x1 from test order by x3;
 1	100	100
 10	1	10
 100	10	1
-select x3, x2, x1 from test order by 3;
-100	10	1
-10	1	10
-1	100	100
-select x3, x2, x1 from test order by -1;
-100	10	1
-10	1	10
-1	100	100
-select x3, x2, x1 from test order by x1;
-100	10	1
-10	1	10
-1	100	100
 select x3, x2, x1 from test order by 1 desc;
 100	10	1
 10	1	10
 1	100	100
-select x3, x2, x1 from test order by -3 desc;
-100	10	1
-10	1	10
-1	100	100
 select x3, x2, x1 from test order by x3 desc;
 100	10	1
 10	1	10
 1	100	100
-select x3, x2, x1 from test order by 3 desc;
-1	100	100
-10	1	10
-100	10	1
-select x3, x2, x1 from test order by -1 desc;
-1	100	100
-10	1	10
-100	10	1
-select x3, x2, x1 from test order by x1 desc;
-1	100	100
-10	1	10
-100	10	1
 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x3, x2 from test group by x3, x2 order by x3;
 1	100
@@ -86,20 +54,6 @@ SELECT
     x1
 FROM test
 ORDER BY x3 + 1 ASC
-explain syntax select x3, x2, x1 from test order by -1;
-SELECT
-    x3,
-    x2,
-    x1
-FROM test
-ORDER BY x1 ASC
-explain syntax select x3 + 1, x2, x1 from test order by -1;
-SELECT
-    x3 + 1,
-    x2,
-    x1
-FROM test
-ORDER BY x1 ASC
 explain syntax select x3, x3 - x2, x2, x1 from test order by 2;
 SELECT
     x3,
@@ -108,14 +62,6 @@ SELECT
     x1
 FROM test
 ORDER BY x3 - x2 ASC
-explain syntax select x3, x3 - x2, x2, x1 from test order by -2;
-SELECT
-    x3,
-    x3 - x2,
-    x2,
-    x1
-FROM test
-ORDER BY x2 ASC
 explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2;
 SELECT
     x3,
@@ -123,28 +69,12 @@ SELECT
     x1 + x2
 FROM test
 ORDER BY if(x3 > 10, x3, x1 + x2) ASC
-explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2;
-SELECT
-    x3,
-    if(x3 > 10, x3, x1 + x2),
-    x1 + x2
-FROM test
-ORDER BY if(x3 > 10, x3, x1 + x2) ASC
 explain syntax select max(x1), x2 from test group by 2 order by 1, 2;
 SELECT
     max(x1),
     x2
 FROM test
 GROUP BY x2
-ORDER BY
-    max(x1) ASC,
-    x2 ASC
-explain syntax select max(x1), x2 from test group by -1 order by -2, -1;
-SELECT
-    max(x1),
-    x2
-FROM test
-GROUP BY x2
 ORDER BY
     max(x1) ASC,
     x2 ASC
@@ -153,34 +83,16 @@ SELECT
     1 + greatest(x1, 1),
     x2
 FROM test
-GROUP BY
-    1 + greatest(x1, 1),
-    x2
-explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1;
-SELECT
-    1 + greatest(x1, 1),
-    x2
-FROM test
 GROUP BY
     1 + greatest(x1, 1),
     x2
 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
 select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
-select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
-select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
 explain syntax select x1 + x3, x3 from test group by 1, 2;
 SELECT
     x1 + x3,
     x3
 FROM test
-GROUP BY
-    x1 + x3,
-    x3
-explain syntax select x1 + x3, x3 from test group by -2, -1;
-SELECT
-    x1 + x3,
-    x3
-FROM test
 GROUP BY
     x1 + x3,
     x3
@@ -190,14 +102,8 @@ select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2,
 1	2	10	100
 10	20	1	10
 100	200	100	1
-select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc;
-1	2	10	100
-10	20	1	10
-100	200	100	1
 select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
 44	88	13	14	15	16
-select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a;
-44	88	13	14	15	16
 explain syntax select plus(1, 1) as a group by a;
 SELECT 1 + 1 AS a
 GROUP BY a
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql
index 6f427e0298d..159ad6bd427 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments.sql
+++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql
@@ -9,21 +9,11 @@ insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 
 -- { echo }
 select x3, x2, x1 from test order by 1;
-select x3, x2, x1 from test order by -3;
 select x3, x2, x1 from test order by x3;
 
-select x3, x2, x1 from test order by 3;
-select x3, x2, x1 from test order by -1;
-select x3, x2, x1 from test order by x1;
-
 select x3, x2, x1 from test order by 1 desc;
-select x3, x2, x1 from test order by -3 desc;
 select x3, x2, x1 from test order by x3 desc;
 
-select x3, x2, x1 from test order by 3 desc;
-select x3, x2, x1 from test order by -1 desc;
-select x3, x2, x1 from test order by x1 desc;
-
 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x3, x2 from test group by x3, x2 order by x3;
 select x3, x2 from test group by 1, 2 order by x3;
@@ -35,32 +25,21 @@ select x1, x2, x3 from test order by 3 limit 1 by 1;
 
 explain syntax select x3, x2, x1 from test order by 1;
 explain syntax select x3 + 1, x2, x1 from test order by 1;
-explain syntax select x3, x2, x1 from test order by -1;
-explain syntax select x3 + 1, x2, x1 from test order by -1;
 explain syntax select x3, x3 - x2, x2, x1 from test order by 2;
-explain syntax select x3, x3 - x2, x2, x1 from test order by -2;
 explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2;
-explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2;
 explain syntax select max(x1), x2 from test group by 2 order by 1, 2;
-explain syntax select max(x1), x2 from test group by -1 order by -2, -1;
 explain syntax select 1 + greatest(x1, 1), x2 from test group by 1, 2;
-explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1;
 
 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
 select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 }
-select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
-select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 }
 
 explain syntax select x1 + x3, x3 from test group by 1, 2;
-explain syntax select x1 + x3, x3 from test group by -2, -1;
 
 create table test2(x1 Int, x2 Int, x3 Int) engine=Memory;
 insert into test2 values (1, 10, 100), (10, 1, 10), (100, 100, 1);
 select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, 4 desc, 3 asc;
-select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc;
 
 select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a;
-select a, b, c, d, e, f  from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a;
 
 explain syntax select plus(1, 1) as a group by a;
 select substr('aaaaaaaaaaaaaa', 8) as a  group by a order by a;
diff --git a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
index 603c7783ef8..0c28c120d40 100644
--- a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
+++ b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql
@@ -1,6 +1,5 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/43202
 -- Queries are generated by the fuzzer, so don't expect them to make sense
-SET enable_positional_arguments=0;
 SELECT NULL, '' FROM (SELECT toNullable(''), NULL AS key GROUP BY GROUPING SETS ((NULL))) AS s1 ALL LEFT JOIN (SELECT '' AS key, NULL AS value GROUP BY GROUPING SETS (('')) WITH TOTALS UNION ALL SELECT NULL AS key, toNullable(NULL) AS value GROUP BY '', NULL, '' WITH TOTALS) AS s2 USING (key);
 SELECT NULL GROUP BY NULL WITH TOTALS;
 SELECT 1048575, NULL, b FROM (SELECT '25.5' AS a, NULL, NULL AS b GROUP BY GROUPING SETS ((0.0001)) WITH TOTALS) AS js1 ANY RIGHT JOIN (SELECT NULL AS a, NULL AS b WHERE NULL GROUP BY NULL, -9223372036854775807 WITH CUBE WITH TOTALS UNION ALL SELECT NULL AS a, NULL AS b GROUP BY 1, '21474836.46' WITH TOTALS) AS js2 USING (a, b) ORDER BY nan DESC NULLS LAST, '9223372036854775807' DESC NULLS LAST, a ASC NULLS LAST;

From 090d412d7cc37104ba90355c880a357fbd34e091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 13 Dec 2023 13:14:00 +0100
Subject: [PATCH 199/213] Add tests for 46628

---
 .../02943_positional_arguments_bugs.reference |  2 ++
 .../02943_positional_arguments_bugs.sql       | 23 +++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 tests/queries/0_stateless/02943_positional_arguments_bugs.reference
 create mode 100644 tests/queries/0_stateless/02943_positional_arguments_bugs.sql

diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.reference b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
new file mode 100644
index 00000000000..702e1261186
--- /dev/null
+++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference
@@ -0,0 +1,2 @@
+45	1
+processed	99	0
diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.sql b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
new file mode 100644
index 00000000000..b8cf73da42d
--- /dev/null
+++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql
@@ -0,0 +1,23 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/46628
+DROP TABLE IF EXISTS t;
+CREATE TABLE t
+(
+    `n` int
+)
+    ENGINE = MergeTree
+        ORDER BY n AS
+SELECT *
+FROM numbers(10);
+
+SELECT
+    sum(n),
+    1 AS x
+FROM t
+GROUP BY x;
+
+SELECT
+    'processed' AS type,
+    max(number) AS max_date,
+    min(number) AS min_date
+FROM numbers(100)
+GROUP BY type;

From 2a0a5f755c166604ca67901559afa50261556222 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 13 Dec 2023 12:29:57 +0000
Subject: [PATCH 200/213] Comment to about possible query results

---
 tests/integration/test_parallel_replicas_working_set/test.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
index 6292d33ed3e..0ede9d9b1a5 100644
--- a/tests/integration/test_parallel_replicas_working_set/test.py
+++ b/tests/integration/test_parallel_replicas_working_set/test.py
@@ -102,8 +102,12 @@ def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
     table_name = "test_table"
     create_tables(cluster, table_name, node_with_covering_part)
 
+    # query result can be one of the following outcomes
+    # (1) query result if parallel replicas working set contains all_0_5_1
     expected_full_result = "60\t0\t59\t1770\n"
     expected_results = {expected_full_result}
+
+    # (2) query result if parallel replicas working set DOESN'T contain all_0_5_1
     if node_with_covering_part == 0:
         expected_results.add("40\t20\t59\t1580\n")
     if node_with_covering_part == 1:

From 54abbf146d1d7dfcdab8da50a4292e2079d40bc8 Mon Sep 17 00:00:00 2001
From: Johnny <9611008+johnnymatthews@users.noreply.github.com>
Date: Wed, 13 Dec 2023 09:04:07 -0400
Subject: [PATCH 201/213] Update into-outfile.md

---
 docs/en/sql-reference/statements/select/into-outfile.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md
index 985f5e25b05..5b7196f13e3 100644
--- a/docs/en/sql-reference/statements/select/into-outfile.md
+++ b/docs/en/sql-reference/statements/select/into-outfile.md
@@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of
 **Syntax**
 
 ```sql
-SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]]
+SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [APPEND | TRUNCATE] [COMPRESSION type [LEVEL level]]
 ```
 
 `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.

From c165be76abfb51a2dca9ee9a7baec9e46ce52d34 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
Date: Wed, 13 Dec 2023 14:42:06 +0100
Subject: [PATCH 202/213] Parallel replicas: friendly settings (#57542)

---
 docker/test/stateful/run.sh                   |  2 +-
 .../ClusterProxy/executeQuery.cpp             | 38 ++++++++++++++++++-
 src/Interpreters/ClusterProxy/executeQuery.h  |  3 +-
 src/Interpreters/Context.cpp                  |  2 +-
 src/Storages/StorageMergeTree.cpp             | 22 +++++------
 src/Storages/StorageReplicatedMergeTree.cpp   | 17 +++------
 src/Storages/StorageReplicatedMergeTree.h     |  4 +-
 .../test.py                                   |  1 -
 .../test.py                                   |  3 --
 .../test.py                                   |  1 -
 .../test.py                                   |  2 -
 ...arallel_reading_from_replicas_benchmark.sh |  1 -
 .../02731_parallel_replicas_join_subquery.sql |  1 -
 ...arallel_replicas_bug_chunkinfo_not_set.sql |  2 +-
 ...764_parallel_replicas_plain_merge_tree.sql |  2 +-
 ...02765_parallel_replicas_final_modifier.sql |  2 +-
 ...9_parallel_replicas_unavailable_shards.sql |  2 +-
 ...02771_parallel_replicas_analyzer.reference |  4 +-
 .../02771_parallel_replicas_analyzer.sql      |  3 +-
 ...lel_replicas_trivial_count_optimization.sh |  4 --
 ...84_parallel_replicas_automatic_decision.sh |  1 -
 ...rallel_replicas_automatic_decision_join.sh |  1 -
 ...02811_parallel_replicas_prewhere_count.sql |  1 -
 ...835_parallel_replicas_over_distributed.sql |  8 ++--
 .../02841_parallel_replicas_summary.sh        |  2 -
 .../02861_index_set_incorrect_args.sql        |  2 +-
 ...69_parallel_replicas_read_from_several.sql |  2 +-
 ...parallel_replicas_cluster_all_replicas.sql |  2 +-
 .../02875_parallel_replicas_remote.sql        |  2 +-
 .../02898_parallel_replicas_progress_bar.sql  |  2 +-
 .../02901_parallel_replicas_rollup.sh         |  2 -
 ...02935_parallel_replicas_settings.reference |  4 ++
 .../02935_parallel_replicas_settings.sql      | 35 +++++++++++++++++
 .../1_stateful/00177_memory_bound_merging.sh  |  6 +--
 34 files changed, 114 insertions(+), 72 deletions(-)
 create mode 100644 tests/queries/0_stateless/02935_parallel_replicas_settings.reference
 create mode 100644 tests/queries/0_stateless/02935_parallel_replicas_settings.sql

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index a0def50bfb5..806b57c4616 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -151,7 +151,7 @@ function run_tests()
     set +e
 
     if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
-        clickhouse-test --client="clickhouse-client --use_hedged_requests=0  --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
+        clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
             --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
             -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
         "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 8a2f7e3205a..f3b7e371f38 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
 {
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
     extern const int LOGICAL_ERROR;
+    extern const int CLUSTER_DOESNT_EXIST;
 }
 
 namespace ClusterProxy
@@ -322,11 +323,44 @@ void executeQueryWithParallelReplicas(
     SelectStreamFactory & stream_factory,
     const ASTPtr & query_ast,
     ContextPtr context,
-    std::shared_ptr<const StorageLimitsList> storage_limits,
-    const ClusterPtr & not_optimized_cluster)
+    std::shared_ptr<const StorageLimitsList> storage_limits)
 {
     const auto & settings = context->getSettingsRef();
+
+    /// check cluster for parallel replicas
+    if (settings.cluster_for_parallel_replicas.value.empty())
+    {
+        throw Exception(
+            ErrorCodes::CLUSTER_DOESNT_EXIST,
+            "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set "
+            "'cluster_for_parallel_replicas' setting");
+    }
+    auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas);
+
     auto new_context = Context::createCopy(context);
+
+    /// check hedged connections setting
+    if (settings.use_hedged_requests.value)
+    {
+        if (settings.use_hedged_requests.changed)
+        {
+            LOG_WARNING(
+                &Poco::Logger::get("executeQueryWithParallelReplicas"),
+                "Setting 'use_hedged_requests' explicitly with enabled 'allow_experimental_parallel_reading_from_replicas' has no effect. "
+                "Hedged connections are not used for parallel reading from replicas");
+        }
+        else
+        {
+            LOG_INFO(
+                &Poco::Logger::get("executeQueryWithParallelReplicas"),
+                "Disabling 'use_hedged_requests' in favor of 'allow_experimental_parallel_reading_from_replicas'. Hedged connections are "
+                "not used for parallel reading from replicas");
+        }
+
+        /// disable hedged connections -> parallel replicas uses own logic to choose replicas
+        new_context->setSetting("use_hedged_requests", Field{false});
+    }
+
     auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{};
 
     UInt64 shard_num = 0; /// shard_num is 1-based, so 0 - no shard specified
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index 7ffaa3ae62c..2149d8c1640 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -71,8 +71,7 @@ void executeQueryWithParallelReplicas(
     SelectStreamFactory & stream_factory,
     const ASTPtr & query_ast,
     ContextPtr context,
-    std::shared_ptr<const StorageLimitsList> storage_limits,
-    const ClusterPtr & not_optimized_cluster);
+    std::shared_ptr<const StorageLimitsList> storage_limits);
 }
 
 }
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index f0f20e171af..79cfe9a9546 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -5020,7 +5020,7 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
     if (!settings_ref.parallel_replicas_custom_key.value.empty())
         return CUSTOM_KEY;
 
-    if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0 && !settings_ref.use_hedged_requests)
+    if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0)
         return READ_TASKS;
 
     return SAMPLE_KEY;
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index e9a0dd5fbf3..22d72902e8d 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -211,17 +211,12 @@ void StorageMergeTree::read(
 {
     if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree)
     {
-        auto table_id = getStorageID();
-
+        const auto table_id = getStorageID();
         const auto & modified_query_ast =  ClusterProxy::rewriteSelectQuery(
             local_context, query_info.query,
             table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
 
-        String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas;
-        auto cluster = local_context->getCluster(cluster_for_parallel_replicas);
-
         Block header;
-
         if (local_context->getSettingsRef().allow_experimental_analyzer)
             header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze());
         else
@@ -240,17 +235,22 @@ void StorageMergeTree::read(
             select_stream_factory,
             modified_query_ast,
             local_context,
-            query_info.storage_limits,
-            cluster);
+            query_info.storage_limits);
     }
     else
     {
         const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree;
 
         if (auto plan = reader.read(
-            column_names, storage_snapshot, query_info,
-            local_context, max_block_size, num_streams,
-            processed_stage, nullptr, enable_parallel_reading))
+                column_names,
+                storage_snapshot,
+                query_info,
+                local_context,
+                max_block_size,
+                num_streams,
+                processed_stage,
+                nullptr,
+                enable_parallel_reading))
             query_plan = std::move(*plan);
     }
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 4fb21705534..307870aaf4c 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5338,7 +5338,7 @@ void StorageReplicatedMergeTree::read(
         return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
 
     if (local_context->canUseParallelReplicasOnInitiator())
-        return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
+        return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage);
 
     readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
 }
@@ -5367,18 +5367,11 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
     ContextPtr local_context,
-    QueryProcessingStage::Enum processed_stage,
-    const size_t /*max_block_size*/,
-    const size_t /*num_streams*/)
+    QueryProcessingStage::Enum processed_stage)
 {
-    auto table_id = getStorageID();
-
-    auto scalars = local_context->hasQueryContext() ? local_context->getQueryContext()->getScalars() : Scalars{};
-    String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas;
-    auto parallel_replicas_cluster = local_context->getCluster(cluster_for_parallel_replicas);
-
     ASTPtr modified_query_ast;
     Block header;
+
     if (local_context->getSettingsRef().allow_experimental_analyzer)
     {
         auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
@@ -5389,6 +5382,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
     }
     else
     {
+        const auto table_id = getStorageID();
         modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
             table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
         header
@@ -5407,8 +5401,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
         select_stream_factory,
         modified_query_ast,
         local_context,
-        query_info.storage_limits,
-        parallel_replicas_cluster);
+        query_info.storage_limits);
 }
 
 void StorageReplicatedMergeTree::readLocalImpl(
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index a8ab8eb7013..159828effcf 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -582,9 +582,7 @@ private:
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
         ContextPtr local_context,
-        QueryProcessingStage::Enum processed_stage,
-        size_t max_block_size,
-        size_t num_streams);
+        QueryProcessingStage::Enum processed_stage);
 
     template <class Func>
     void foreachActiveParts(Func && func, bool select_sequential_consistency) const;
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
index 7e12da956ea..8af7bb12595 100644
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
@@ -119,7 +119,6 @@ def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica)
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "prefer_localhost_replica": prefer_localhost_replica,
                 "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
             },
         )
         == expected_result
diff --git a/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py b/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py
index 315a9781c8b..af114ade2d7 100644
--- a/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py
@@ -84,7 +84,6 @@ def test_skip_unavailable_shards(start_cluster, prefer_localhost_replica):
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
                 "prefer_localhost_replica": prefer_localhost_replica,
                 "skip_unavailable_shards": 1,
                 "connections_with_failover_max_tries": 0,  # just don't wait for unavailable replicas
@@ -119,7 +118,6 @@ def test_error_on_unavailable_shards(start_cluster, prefer_localhost_replica):
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
                 "prefer_localhost_replica": prefer_localhost_replica,
                 "skip_unavailable_shards": 0,
             },
@@ -155,7 +153,6 @@ def test_no_unavailable_shards(start_cluster, skip_unavailable_shards):
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
                 "prefer_localhost_replica": 0,
                 "skip_unavailable_shards": skip_unavailable_shards,
             },
diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py
index ecfc2ddea63..00b95965b65 100644
--- a/tests/integration/test_parallel_replicas_over_distributed/test.py
+++ b/tests/integration/test_parallel_replicas_over_distributed/test.py
@@ -137,7 +137,6 @@ def test_parallel_replicas_over_distributed(
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "prefer_localhost_replica": prefer_localhost_replica,
                 "max_parallel_replicas": max_parallel_replicas,
-                "use_hedged_requests": 0,
             },
         )
         == expected_result
diff --git a/tests/integration/test_parallel_replicas_skip_shards/test.py b/tests/integration/test_parallel_replicas_skip_shards/test.py
index 3df80ba061e..a18c82a53a9 100644
--- a/tests/integration/test_parallel_replicas_skip_shards/test.py
+++ b/tests/integration/test_parallel_replicas_skip_shards/test.py
@@ -38,7 +38,6 @@ def test_skip_unavailable_shards(start_cluster):
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
                 "skip_unavailable_shards": 1,
                 # "async_socket_for_remote" : 0,
                 # "async_query_sending_for_remote" : 0,
@@ -65,7 +64,6 @@ def test_error_on_unavailable_shards(start_cluster):
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 2,
                 "max_parallel_replicas": 3,
-                "use_hedged_requests": 0,
                 "skip_unavailable_shards": 0,
             },
         )
diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
index 941f024825a..bc90f4b2c11 100755
--- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
+++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh
@@ -17,7 +17,6 @@ opts=(
     --allow_experimental_parallel_reading_from_replicas 1
     --parallel_replicas_for_non_replicated_merge_tree 1
     --max_parallel_replicas 3
-    --use_hedged_requests 0
     --cluster_for_parallel_replicas parallel_replicas
 
     --iterations 1
diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql
index 29c20980c14..fa40c96048c 100644
--- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql
+++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql
@@ -23,7 +23,6 @@ SET allow_experimental_analyzer = 0;
 SET max_parallel_replicas = 3;
 SET prefer_localhost_replica = 1;
 SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost';
-SET use_hedged_requests = 0;
 SET joined_subquery_requires_alias = 0;
 
 SELECT '=============== INNER QUERY (NO PARALLEL) ===============';
diff --git a/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql
index 2ea2cecc7b5..5ec0a1fcc31 100644
--- a/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql
+++ b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql
@@ -18,7 +18,7 @@ INSERT INTO join_inner_table__fuzz_1 SELECT
 FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2)
 LIMIT 100;
 
-SET max_parallel_replicas = 3,  prefer_localhost_replica = 1, use_hedged_requests = 0, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 1;
+SET max_parallel_replicas = 3,  prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 1;
 
 -- SELECT query will write a Warning to the logs
 SET send_logs_level='error';
diff --git a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql
index aaf68dfd300..9caa6f76e89 100644
--- a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql
+++ b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql
@@ -1,7 +1,7 @@
 CREATE TABLE IF NOT EXISTS parallel_replicas_plain (x String) ENGINE=MergeTree() ORDER BY x;
 INSERT INTO parallel_replicas_plain SELECT toString(number) FROM numbers(10);
 
-SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas';
+SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, cluster_for_parallel_replicas='parallel_replicas';
 SET send_logs_level='error';
 SET parallel_replicas_for_non_replicated_merge_tree = 0;
 
diff --git a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql
index f447051e1e5..6c121802b06 100644
--- a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql
+++ b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql
@@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS parallel_replicas_final (x String) ENGINE=ReplacingMe
 
 INSERT INTO parallel_replicas_final SELECT toString(number) FROM numbers(10);
 
-SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas';
+SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, cluster_for_parallel_replicas='parallel_replicas';
 SET parallel_replicas_for_non_replicated_merge_tree = 1;
 
 SELECT * FROM parallel_replicas_final FINAL FORMAT Null;
diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
index 020a429c109..38d592201e3 100644
--- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
+++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql
@@ -4,7 +4,7 @@ INSERT INTO test_parallel_replicas_unavailable_shards SELECT * FROM numbers(10);
 
 SYSTEM FLUSH LOGS;
 
-SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1;
+SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1;
 SET send_logs_level='error';
 SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*);
 
diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
index f688db940d9..35573110550 100644
--- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
+++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference
@@ -8,5 +8,5 @@
 5935810273536892891
 7885388429666205427
 8124171311239967992
-1	1	-- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n    SETTINGS\n    allow_experimental_analyzer = 1,\n    max_parallel_replicas = 2,\n    cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n    allow_experimental_parallel_reading_from_replicas = 1,\n    use_hedged_requests = 0;
-0	2	SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1, use_hedged_requests = 0
+1	1	-- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n    SETTINGS\n    allow_experimental_analyzer = 1,\n    max_parallel_replicas = 2,\n    cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n    allow_experimental_parallel_reading_from_replicas = 1;
+0	2	SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1
diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql
index 35089c0cedb..88a0d2163d6 100644
--- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql
+++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql
@@ -24,8 +24,7 @@ FROM join_inner_table__fuzz_146_replicated
     allow_experimental_analyzer = 1,
     max_parallel_replicas = 2,
     cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost',
-    allow_experimental_parallel_reading_from_replicas = 1,
-    use_hedged_requests = 0;
+    allow_experimental_parallel_reading_from_replicas = 1;
 
 SYSTEM FLUSH LOGS;
 -- There should be 2 different queries
diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh
index 9cfd3a392c8..6c697095b57 100755
--- a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh
+++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh
@@ -30,7 +30,6 @@ function run_query_with_pure_parallel_replicas () {
         --query_id "${1}_pure" \
         --max_parallel_replicas 3 \
         --prefer_localhost_replica 1 \
-        --use_hedged_requests 0 \
         --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \
         --allow_experimental_parallel_reading_from_replicas 1 \
         --allow_experimental_analyzer 0
@@ -40,7 +39,6 @@ function run_query_with_pure_parallel_replicas () {
         --query_id "${1}_pure_analyzer" \
         --max_parallel_replicas 3 \
         --prefer_localhost_replica 1 \
-        --use_hedged_requests 0 \
         --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \
         --allow_experimental_parallel_reading_from_replicas 1 \
         --allow_experimental_analyzer 1
@@ -56,7 +54,6 @@ function run_query_with_custom_key_parallel_replicas () {
         --query "$2" \
         --query_id "${1}_custom_key" \
         --max_parallel_replicas 3 \
-        --use_hedged_requests 0 \
         --parallel_replicas_custom_key_filter_type 'default' \
         --parallel_replicas_custom_key "$2" \
         --allow_experimental_analyzer 0
@@ -65,7 +62,6 @@ function run_query_with_custom_key_parallel_replicas () {
         --query "$2" \
         --query_id "${1}_custom_key_analyzer" \
         --max_parallel_replicas 3 \
-        --use_hedged_requests 0 \
         --parallel_replicas_custom_key_filter_type 'default' \
         --parallel_replicas_custom_key "$2" \
         --allow_experimental_analyzer 1
diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh
index 741b51284fe..8a3b34e5cfa 100755
--- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh
+++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh
@@ -49,7 +49,6 @@ function run_query_with_pure_parallel_replicas () {
         --query_id "${1}_pure" \
         --max_parallel_replicas 3 \
         --prefer_localhost_replica 1 \
-        --use_hedged_requests 0 \
         --cluster_for_parallel_replicas "parallel_replicas" \
         --allow_experimental_parallel_reading_from_replicas 1 \
         --parallel_replicas_for_non_replicated_merge_tree 1 \
diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
index ed68a304b85..baeeb820da5 100755
--- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
+++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh
@@ -64,7 +64,6 @@ function run_query_with_pure_parallel_replicas () {
         --query_id "${1}_pure" \
         --max_parallel_replicas 3 \
         --prefer_localhost_replica 1 \
-        --use_hedged_requests 0 \
         --cluster_for_parallel_replicas "parallel_replicas" \
         --allow_experimental_parallel_reading_from_replicas 1 \
         --parallel_replicas_for_non_replicated_merge_tree 1 \
diff --git a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql
index 374d73d7d03..14edeecf57e 100644
--- a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql
+++ b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql
@@ -13,7 +13,6 @@ SET
 skip_unavailable_shards=1,
 allow_experimental_parallel_reading_from_replicas=1,
 max_parallel_replicas=3,
-use_hedged_requests=0,
 cluster_for_parallel_replicas='parallel_replicas',
 parallel_replicas_for_non_replicated_merge_tree=1,
 parallel_replicas_min_number_of_rows_per_replica=1000;
diff --git a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql
index 60aa5748575..1e6f9304c0c 100644
--- a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql
+++ b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql
@@ -14,13 +14,13 @@ insert into test select *, today() from numbers(100);
 
 SELECT count(), min(id), max(id), avg(id)
 FROM test_d
-SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1;
 
 insert into test select *, today() from numbers(100);
 
 SELECT count(), min(id), max(id), avg(id)
 FROM test_d
-SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1;
 
 -- 2 shards
 
@@ -38,10 +38,10 @@ insert into test2 select *, today() from numbers(100);
 
 SELECT count(), min(id), max(id), avg(id)
 FROM test2_d
-SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1;
 
 insert into test2 select *, today() from numbers(100);
 
 SELECT count(), min(id), max(id), avg(id)
 FROM test2_d
-SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0;
+SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1;
diff --git a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh
index 792c45b06d6..c82d2c8b0c0 100755
--- a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh
+++ b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh
@@ -36,7 +36,6 @@ echo "
         cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost',
         allow_experimental_parallel_reading_from_replicas = 2,
         parallel_replicas_for_non_replicated_merge_tree = 1,
-        use_hedged_requests = 0,
         interactive_delay=0
     "\
     | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_0" --data-binary @- -vvv 2>&1 \
@@ -51,7 +50,6 @@ echo "
         cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost',
         allow_experimental_parallel_reading_from_replicas = 2,
         parallel_replicas_for_non_replicated_merge_tree = 1,
-        use_hedged_requests = 0,
         interactive_delay=99999999999
     "\
     | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_high" --data-binary @- -vvv 2>&1 \
diff --git a/tests/queries/0_stateless/02861_index_set_incorrect_args.sql b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql
index fa51f5c9abc..17b505cd051 100644
--- a/tests/queries/0_stateless/02861_index_set_incorrect_args.sql
+++ b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql
@@ -2,5 +2,5 @@
 DROP TABLE IF EXISTS set_index__fuzz_41;
 CREATE TABLE set_index__fuzz_41 (`a` Date, `b` Nullable(DateTime64(3)), INDEX b_set b TYPE set(0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple();
 INSERT INTO set_index__fuzz_41 (a) VALUES (today());
-SELECT b FROM set_index__fuzz_41 WHERE and(b = 256) SETTINGS force_data_skipping_indices = 'b_set', optimize_move_to_prewhere = 0, max_parallel_replicas=2, parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_parallel_reading_from_replicas=2, use_hedged_requests=0; -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION }
+SELECT b FROM set_index__fuzz_41 WHERE and(b = 256) SETTINGS force_data_skipping_indices = 'b_set', optimize_move_to_prewhere = 0, max_parallel_replicas=2, parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_parallel_reading_from_replicas=2; -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION }
 DROP TABLE set_index__fuzz_41;
diff --git a/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql
index 9559b46fa08..e040fae1fa6 100644
--- a/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql
+++ b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql
@@ -24,5 +24,5 @@ system sync replica t3;
 
 SELECT count(), min(k), max(k), avg(k)
 FROM t1
-SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, use_hedged_requests=0,
+SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0,
          cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_single_task_marks_count_multiplier = 0.001;
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
index 1201a156246..f59d38ceb04 100644
--- a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
+++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
@@ -2,7 +2,7 @@ DROP TABLE IF EXISTS tt;
 CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
 INSERT INTO tt SELECT * FROM numbers(10);
 
-SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1;
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1;
 SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_localhost', currentDatabase(), tt) settings log_comment='02875_190aed82-2423-413b-ad4c-24dcca50f65b';
 
 SYSTEM FLUSH LOGS;
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
index f47fc559df9..5fbaf34b621 100644
--- a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
+++ b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
@@ -2,7 +2,7 @@ DROP TABLE IF EXISTS tt;
 CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
 INSERT INTO tt SELECT * FROM numbers(10);
 
-SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1;
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1;
 SELECT count() FROM remote('127.0.0.{1..6}', currentDatabase(), tt) settings log_comment='02875_89f3c39b-1919-48cb-b66e-ef9904e73146';
 
 SYSTEM FLUSH LOGS;
diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
index 70a1cedf663..6b2f146efd0 100644
--- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
+++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
@@ -14,7 +14,7 @@ system sync replica t1;
 system sync replica t2;
 system sync replica t3;
 
-SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost';
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost';
 
 -- default coordinator
 SELECT count(), min(k), max(k), avg(k) FROM t1 SETTINGS log_comment='02898_default_190aed82-2423-413b-ad4c-24dcca50f65b';
diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
index f23b80348c1..9c922ec4723 100755
--- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
+++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
@@ -29,7 +29,6 @@ $CLICKHOUSE_CLIENT \
   --query_id "${query_id}" \
   --max_parallel_replicas 3 \
   --prefer_localhost_replica 1 \
-  --use_hedged_requests 0 \
   --cluster_for_parallel_replicas "parallel_replicas" \
   --allow_experimental_parallel_reading_from_replicas 1 \
   --parallel_replicas_for_non_replicated_merge_tree 1 \
@@ -63,7 +62,6 @@ $CLICKHOUSE_CLIENT \
   --query_id "${query_id}" \
   --max_parallel_replicas 3 \
   --prefer_localhost_replica 1 \
-  --use_hedged_requests 0 \
   --cluster_for_parallel_replicas "parallel_replicas" \
   --allow_experimental_parallel_reading_from_replicas 1 \
   --parallel_replicas_for_non_replicated_merge_tree 1 \
diff --git a/tests/queries/0_stateless/02935_parallel_replicas_settings.reference b/tests/queries/0_stateless/02935_parallel_replicas_settings.reference
new file mode 100644
index 00000000000..846d77bfa57
--- /dev/null
+++ b/tests/queries/0_stateless/02935_parallel_replicas_settings.reference
@@ -0,0 +1,4 @@
+10
+1
+10
+1
diff --git a/tests/queries/0_stateless/02935_parallel_replicas_settings.sql b/tests/queries/0_stateless/02935_parallel_replicas_settings.sql
new file mode 100644
index 00000000000..be6f1c2958c
--- /dev/null
+++ b/tests/queries/0_stateless/02935_parallel_replicas_settings.sql
@@ -0,0 +1,35 @@
+DROP TABLE IF EXISTS test_parallel_replicas_settings;
+CREATE TABLE test_parallel_replicas_settings (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
+INSERT INTO test_parallel_replicas_settings SELECT * FROM numbers(10);
+
+SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1;
+
+SET cluster_for_parallel_replicas='';
+SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*); -- { serverError CLUSTER_DOESNT_EXIST }
+
+SET cluster_for_parallel_replicas='parallel_replicas';
+SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*) settings log_comment='0_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f';
+
+SYSTEM FLUSH LOGS;
+
+SELECT count() > 0 FROM system.text_log
+WHERE yesterday() <= event_date
+      AND query_id in (select query_id from system.query_log where current_database=currentDatabase() AND log_comment='0_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f')
+      AND level = 'Information'
+      AND message ILIKE '%Disabling ''use_hedged_requests'' in favor of ''allow_experimental_parallel_reading_from_replicas''%'
+SETTINGS allow_experimental_parallel_reading_from_replicas=0;
+
+SET use_hedged_requests=1;
+SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*) settings log_comment='1_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f';
+
+SYSTEM FLUSH LOGS;
+
+SET allow_experimental_parallel_reading_from_replicas=0;
+SELECT count() > 0 FROM system.text_log
+WHERE yesterday() <= event_date
+      AND query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '1_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f')
+      AND level = 'Warning'
+      AND message ILIKE '%Setting ''use_hedged_requests'' explicitly with enabled ''allow_experimental_parallel_reading_from_replicas'' has no effect%'
+SETTINGS allow_experimental_parallel_reading_from_replicas=0;
+
+DROP TABLE test_parallel_replicas_settings;
diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh
index ce889b338d6..d5cd1a05cd8 100755
--- a/tests/queries/1_stateful/00177_memory_bound_merging.sh
+++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh
@@ -31,7 +31,7 @@ test1() {
         GROUP BY CounterID, URL, EventDate
         ORDER BY URL, EventDate
         LIMIT 5 OFFSET 10
-        SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0"
+        SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3"
     check_replicas_read_in_order $query_id
 }
 
@@ -48,7 +48,7 @@ test2() {
         GROUP BY URL, EventDate
         ORDER BY URL, EventDate
         LIMIT 5 OFFSET 10
-        SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0, query_plan_aggregation_in_order = 1"
+        SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, query_plan_aggregation_in_order = 1"
     check_replicas_read_in_order $query_id
 }
 
@@ -64,7 +64,7 @@ test3() {
             FROM test.hits
             WHERE CounterID = 1704509 AND UserID = 4322253409885123546
             GROUP BY URL, EventDate
-            SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0
+            SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3
         )
         WHERE explain LIKE '%Aggr%Transform%' OR explain LIKE '%InOrder%'"
 }

From fa7190805a64aa447b9bb568db26f614f15adfb5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Dec 2023 17:59:55 +0100
Subject: [PATCH 203/213] A timeout

---
 docker/test/clickbench/run.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index 921d2023fd7..ba9580f55ae 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+SCRIPT_PID=$!
+(sleep 1200 && kill -9 $SCRIPT_PID) &
+
 # shellcheck disable=SC1091
 source /setup_export_logs.sh
 

From 1d70c9464e6e23dde1e5a9370bb9ae1d57bec9c9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Dec 2023 18:01:18 +0100
Subject: [PATCH 204/213] Add it to master checks

---
 .github/workflows/master.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 05654926fd7..771de46be13 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -555,6 +555,27 @@ jobs:
         cd "$REPO_COPY/tests/ci"
         python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
 ##############################################################################################
+########################### ClickBench #######################################################
+##############################################################################################
+  ClickBenchAMD64:
+    needs: [BuilderDebRelease]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (amd64)
+      runner_type: func-tester
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME"
+  ClickBenchAarch64:
+    needs: [BuilderDebAarch64]
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickBench (aarch64)
+      runner_type: func-tester-aarch64
+      run_command: |
+        cd "$REPO_COPY/tests/ci"
+        python3 clickbench.py "$CHECK_NAME"
+##############################################################################################
 ######################################### STRESS TESTS #######################################
 ##############################################################################################
   StressTestAsan:

From 055c2314381edba5e9f946f7450f886d09024ba0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 13 Dec 2023 17:59:53 +0100
Subject: [PATCH 205/213] Disable system.kafka_consumers by default (due to
 possible live memory leak)

It is not safe to use statistics because of how KafkaEngine works - it
pre-creates consumers, and this leads to the situation when this
statistics entries generated (RD_KAFKA_OP_STATS), but never consumed.

Which creates a live memory leak for a server with Kafka tables, but
without materialized view attached to it (and no SELECT).

Another problem is that this makes shutdown very slow, because of how
pending queue entries are handled in librdkafka, it uses
TAILQ_INSERT_SORTED, which is sorted insert into linked list, which
works incredibly slow (likely you will never wait till it ends and kill
the server)

For instance in my production setup the server was running for ~67 days
with such table, and it got 1'942'233 `TAILQ_INSERT_SORTED` entries
(which perfectly matches by the way - `67*86400/3` = 1'929'600), and it
moved only 289'806 entries for a few hours, though I'm not sure how much
time the process was in the running state, since most of the time it was
with debugger attached.

So for now let's disable it, to make this patch easy for backporting,
and I will think about long term fix - do not pre-create consumers in
Kafka engine.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/Kafka/StorageKafka.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
index c17defca673..34138b2237f 100644
--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@@ -661,10 +661,19 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config,
 
     if (kafka_consumer_weak_ptr_ptr)
     {
+        /// NOTE: statistics should be consumed, otherwise it creates too much
+        /// entries in the queue, that leads to memory leak and slow shutdown.
+        ///
+        /// This is the case when you have kafka table but no SELECT from it or
+        /// materialized view attached.
+        ///
+        /// So for now it is disabled by default, until properly fixed.
+#if 0
         if (!config.has(config_prefix + "." + "statistics_interval_ms"))
         {
             kafka_config.set("statistics.interval.ms", "3000"); // every 3 seconds by default. set to 0 to disable.
         }
+#endif
 
         if (kafka_config.get("statistics.interval.ms") != "0")
         {

From 30e6797c7e63488056405dc69c8f46ec22d737fd Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 13 Dec 2023 17:19:14 +0000
Subject: [PATCH 206/213] Update version_date.tsv and changelogs after
 v23.11.2.11-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.11.2.11-stable.md | 22 ++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  1 +
 5 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.11.2.11-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 8fc639af1a7..a238a9851d9 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.11.1.2711"
+ARG VERSION="23.11.2.11"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 76b03218eab..31dbc38708f 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.11.1.2711"
+ARG VERSION="23.11.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index c6dfcf9f679..6bbec625300 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.11.1.2711"
+ARG VERSION="23.11.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.11.2.11-stable.md b/docs/changelogs/v23.11.2.11-stable.md
new file mode 100644
index 00000000000..490cc9a4590
--- /dev/null
+++ b/docs/changelogs/v23.11.2.11-stable.md
@@ -0,0 +1,22 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.11.2.11-stable (6e5411358c8) FIXME as compared to v23.11.1.2711-stable (05bc8ef1e02)
+
+#### Improvement
+* Backported in [#57661](https://github.com/ClickHouse/ClickHouse/issues/57661): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities.  [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix SIGSEGV for aggregation of sparse columns with any() RESPECT NULL [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)).
+* Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631) [#57766](https://github.com/ClickHouse/ClickHouse/pull/57766) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index cb4102b3072..f319f57e0b9 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17

From 560e66f3ca5f76475b18ef4d6a3efe2b9a21a9af Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 13 Dec 2023 19:51:49 +0100
Subject: [PATCH 207/213] More respect to `min_number_of_marks` in
 `ParallelReplicasReadingCoordinator` (#57763)

---
 .../ParallelReplicasReadingCoordinator.cpp    | 62 +++++++++----------
 1 file changed, 29 insertions(+), 33 deletions(-)

diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 9137dc89705..757d1461769 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -300,20 +300,20 @@ void DefaultCoordinator::selectPartsAndRanges(const PartRefs & container, size_t
         while (!part->description.ranges.empty() && current_mark_size < min_number_of_marks)
         {
             auto & range = part->description.ranges.front();
+            const size_t needed = min_number_of_marks - current_mark_size;
 
-            if (range.getNumberOfMarks() > min_number_of_marks)
+            if (range.getNumberOfMarks() > needed)
             {
-                auto new_range = range;
-                range.begin += min_number_of_marks;
-                new_range.end = new_range.begin + min_number_of_marks;
+                auto range_we_take = MarkRange{range.begin, range.begin + needed};
+                response.description.back().ranges.emplace_back(range_we_take);
+                current_mark_size += range_we_take.getNumberOfMarks();
 
-                response.description.back().ranges.emplace_back(new_range);
-                current_mark_size += new_range.getNumberOfMarks();
-                continue;
+                range.begin += needed;
+                break;
             }
 
-            current_mark_size += part->description.ranges.front().getNumberOfMarks();
-            response.description.back().ranges.emplace_back(part->description.ranges.front());
+            response.description.back().ranges.emplace_back(range);
+            current_mark_size += range.getNumberOfMarks();
             part->description.ranges.pop_front();
         }
     }
@@ -473,23 +473,21 @@ ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest
         {
             while (!global_part_it->description.ranges.empty() && current_mark_size < request.min_number_of_marks)
             {
-                auto range = global_part_it->description.ranges.back();
+                auto & range = global_part_it->description.ranges.back();
+                const size_t needed = request.min_number_of_marks - current_mark_size;
 
-                if (range.getNumberOfMarks() > request.min_number_of_marks)
+                if (range.getNumberOfMarks() > needed)
                 {
-                    auto new_range = range;
-                    range.end -= request.min_number_of_marks;
-                    new_range.begin = new_range.end - request.min_number_of_marks;
+                    auto range_we_take = MarkRange{range.end - needed, range.end};
+                    part.ranges.emplace_front(range_we_take);
+                    current_mark_size += range_we_take.getNumberOfMarks();
 
-                    global_part_it->description.ranges.back() = range;
-
-                    part.ranges.emplace_front(new_range);
-                    current_mark_size += new_range.getNumberOfMarks();
-                    continue;
+                    range.end -= needed;
+                    break;
                 }
 
-                current_mark_size += global_part_it->description.ranges.back().getNumberOfMarks();
-                part.ranges.emplace_front(global_part_it->description.ranges.back());
+                part.ranges.emplace_front(range);
+                current_mark_size += range.getNumberOfMarks();
                 global_part_it->description.ranges.pop_back();
             }
         }
@@ -497,23 +495,21 @@ ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest
         {
             while (!global_part_it->description.ranges.empty() && current_mark_size < request.min_number_of_marks)
             {
-                auto range = global_part_it->description.ranges.front();
+                auto & range = global_part_it->description.ranges.front();
+                const size_t needed = request.min_number_of_marks - current_mark_size;
 
-                if (range.getNumberOfMarks() > request.min_number_of_marks)
+                if (range.getNumberOfMarks() > needed)
                 {
-                    auto new_range = range;
-                    range.begin += request.min_number_of_marks;
-                    new_range.end = new_range.begin + request.min_number_of_marks;
+                    auto range_we_take = MarkRange{range.begin, range.begin + needed};
+                    part.ranges.emplace_back(range_we_take);
+                    current_mark_size += range_we_take.getNumberOfMarks();
 
-                    global_part_it->description.ranges.front() = range;
-
-                    part.ranges.emplace_back(new_range);
-                    current_mark_size += new_range.getNumberOfMarks();
-                    continue;
+                    range.begin += needed;
+                    break;
                 }
 
-                current_mark_size += global_part_it->description.ranges.front().getNumberOfMarks();
-                part.ranges.emplace_back(global_part_it->description.ranges.front());
+                part.ranges.emplace_back(range);
+                current_mark_size += range.getNumberOfMarks();
                 global_part_it->description.ranges.pop_front();
             }
         }

From b8d274d070b89bdfee578492f8210cd96859fdd8 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <julia.kartseva@clickhouse.com>
Date: Wed, 13 Dec 2023 10:59:31 -0800
Subject: [PATCH 208/213] Add malformed output generation to JSON fuzzer
 (#57646)

Randomly modify structural characters of a valid JSON ('{', '}', '[', ']',
':', '"', ',') to generate output that cannot be parsed as JSON.

Follow-up to https://github.com/ClickHouse/ClickHouse/pull/56490
---
 .../sql-reference/table-functions/fuzzJSON.md | 11 ++++
 src/Storages/StorageFuzzJSON.cpp              | 48 ++++++++++----
 src/Storages/StorageFuzzJSON.h                |  1 +
 .../02918_fuzzjson_table_function.reference   |  1 +
 .../02918_fuzzjson_table_function.sql         | 65 +++++++++++++++++--
 5 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/fuzzJSON.md b/docs/en/sql-reference/table-functions/fuzzJSON.md
index 74ccb0bcb8a..a64f35691f6 100644
--- a/docs/en/sql-reference/table-functions/fuzzJSON.md
+++ b/docs/en/sql-reference/table-functions/fuzzJSON.md
@@ -19,6 +19,7 @@ fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
  - `json_str` (String) - The source string representing structured data in JSON format.
  - `random_seed` (UInt64) - Manual random seed for producing stable results.
  - `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
+ - `malform_output` (boolean) - Generate a string that cannot be parsed as a JSON object.
  - `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
  - `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
  - `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
@@ -84,3 +85,13 @@ SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
 {"BRjE":16137826149911306846}
 {"XjKE":15076727133550123563}
 ```
+
+``` sql
+SELECT * FROM fuzzJSON(json_nc, json_str='{"name" : "FuzzJSON"}', random_seed=1337, malform_output=true) LIMIT 3;
+```
+
+``` text
+U"name":"FuzzJSON*"SpByjZKtr2VAyHCO"falseh
+{"name"keFuzzJSON, "g6vVO7TCIk":jTt^
+{"DBhz":YFuzzJSON5}
+```
diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
index 8b2dd7ac692..6bf69efa1dd 100644
--- a/src/Storages/StorageFuzzJSON.cpp
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -248,10 +248,10 @@ Field generateRandomFixedValue(const StorageFuzzJSON::Configuration & config, pc
     return f;
 }
 
-String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & source)
+String fuzzString(UInt64 min_length, UInt64 max_length, pcg64 & rnd, const String & source, std::function<char(pcg64 &)> charGen)
 {
     String result;
-    result.reserve(config.max_key_length);
+    result.reserve(max_length);
 
     using FA = FuzzAction;
     auto get_action = [&]() -> FuzzAction
@@ -261,7 +261,7 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
     };
 
     size_t i = 0;
-    while (i < source.size() && result.size() < config.max_key_length)
+    while (i < source.size() && result.size() < max_length)
     {
         auto action = get_action();
         switch (action)
@@ -271,12 +271,12 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
             }
             break;
             case FA::Edit: {
-                result.push_back(generateRandomKeyCharacter(rnd));
+                result.push_back(charGen(rnd));
                 ++i;
             }
             break;
             case FA::Add: {
-                result.push_back(generateRandomKeyCharacter(rnd));
+                result.push_back(charGen(rnd));
             }
             break;
             default:
@@ -284,12 +284,24 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
         }
     }
 
-    while (result.size() < config.min_key_length)
-        result.push_back(generateRandomKeyCharacter(rnd));
+    while (result.size() < min_length)
+        result.push_back(charGen(rnd));
 
     return result;
 }
 
+String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & key)
+{
+    return fuzzString(config.min_key_length, config.max_key_length, rnd, key, generateRandomKeyCharacter);
+}
+
+// Randomly modify structural characters (e.g. '{', '}', '[', ']', ':', '"') to generate output that cannot be parsed as JSON.
+String fuzzJSONStructure(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & s)
+{
+    return config.should_malform_output ? fuzzString(/*min_length*/ 0, /*max_length*/ s.size(), rnd, s, generateRandomStringValueCharacter)
+                                        : s;
+}
+
 std::shared_ptr<JSONNode>
 generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, JSONValue::Type type)
 {
@@ -397,7 +409,7 @@ void fuzzJSONObject(
     if (next_node->key)
     {
         writeDoubleQuoted(*next_node->key, out);
-        out << ":";
+        out << fuzzJSONStructure(config, rnd, ":");
     }
 
     auto & val = next_node->value;
@@ -405,7 +417,11 @@ void fuzzJSONObject(
     if (val.fixed)
     {
         if (val.fixed->getType() == Field::Types::Which::String)
-            writeDoubleQuoted(val.fixed->get<String>(), out);
+        {
+            out << fuzzJSONStructure(config, rnd, "\"");
+            writeText(val.fixed->get<String>(), out);
+            out << fuzzJSONStructure(config, rnd, "\"");
+        }
         else
             writeFieldText(*val.fixed, out);
     }
@@ -414,9 +430,9 @@ void fuzzJSONObject(
         if (!val.array && !val.object)
             return;
 
-        const auto & [op, cl, node_list] = val.array ? std::make_tuple('[', ']', *val.array) : std::make_tuple('{', '}', *val.object);
+        const auto & [op, cl, node_list] = val.array ? std::make_tuple("[", "]", *val.array) : std::make_tuple("{", "}", *val.object);
 
-        out << op;
+        out << fuzzJSONStructure(config, rnd, op);
 
         bool first = true;
         for (const auto & ptr : node_list)
@@ -426,7 +442,7 @@ void fuzzJSONObject(
 
             WriteBufferFromOwnString child_out;
             if (!first)
-                child_out << ", ";
+                child_out << fuzzJSONStructure(config, rnd, ", ");
             first = false;
 
             fuzzJSONObject(ptr, child_out, config, rnd, depth + 1, node_count);
@@ -435,7 +451,7 @@ void fuzzJSONObject(
                 break;
             out << child_out.str();
         }
-        out << cl;
+        out << fuzzJSONStructure(config, rnd, cl);
     }
 }
 
@@ -554,10 +570,11 @@ Pipe StorageFuzzJSON::read(
     return Pipe::unitePipes(std::move(pipes));
 }
 
-static constexpr std::array<std::string_view, 13> optional_configuration_keys
+static constexpr std::array<std::string_view, 14> optional_configuration_keys
     = {"json_str",
        "random_seed",
        "reuse_output",
+       "malform_output",
        "probability",
        "max_output_length",
        "max_nesting_level",
@@ -583,6 +600,9 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration
     if (collection.has("reuse_output"))
         configuration.should_reuse_output = static_cast<bool>(collection.get<UInt64>("reuse_output"));
 
+    if (collection.has("malform_output"))
+        configuration.should_malform_output = static_cast<bool>(collection.get<UInt64>("malform_output"));
+
     if (collection.has("probability"))
     {
         configuration.probability = collection.get<Float64>("probability");
diff --git a/src/Storages/StorageFuzzJSON.h b/src/Storages/StorageFuzzJSON.h
index f1d78fba85c..027c5a98634 100644
--- a/src/Storages/StorageFuzzJSON.h
+++ b/src/Storages/StorageFuzzJSON.h
@@ -27,6 +27,7 @@ public:
         String json_str = "{}";
         UInt64 random_seed = randomSeed();
         bool should_reuse_output = false;
+        bool should_malform_output = false;
         Float64 probability = 0.25;
 
         UInt64 max_output_length = 1024;
diff --git a/tests/queries/0_stateless/02918_fuzzjson_table_function.reference b/tests/queries/0_stateless/02918_fuzzjson_table_function.reference
index 1b5c6f46f77..8ad9e886b49 100644
--- a/tests/queries/0_stateless/02918_fuzzjson_table_function.reference
+++ b/tests/queries/0_stateless/02918_fuzzjson_table_function.reference
@@ -150,3 +150,4 @@
 {}
 730
 200
+50
diff --git a/tests/queries/0_stateless/02918_fuzzjson_table_function.sql b/tests/queries/0_stateless/02918_fuzzjson_table_function.sql
index 6db0c69dbac..398b3572587 100644
--- a/tests/queries/0_stateless/02918_fuzzjson_table_function.sql
+++ b/tests/queries/0_stateless/02918_fuzzjson_table_function.sql
@@ -92,15 +92,70 @@ SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=0) L
 SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=11) LIMIT 10; -- { serverError BAD_ARGUMENTS }
 
 --
-DROP TABLE IF EXISTS 02918_table_obj;
-CREATE TABLE 02918_table_obj (json_obj Object('json')) Engine=Memory;
+DROP TABLE IF EXISTS 02918_table_obj1;
+CREATE TABLE 02918_table_obj1 (json_obj Object('json')) Engine=Memory;
 
-INSERT INTO 02918_table_obj SELECT * FROM fuzzJSON(
+INSERT INTO 02918_table_obj1 SELECT * FROM fuzzJSON(
     02918_json_fuzzer,
     json_str='{"name": "John Doe", "age": 27, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
     random_seed=12345) LIMIT 200;
-SELECT count() FROM 02918_table_obj;
 
-DROP TABLE IF EXISTS 02918_table_obj;
+SELECT count() FROM 02918_table_obj1;
+
+DROP TABLE IF EXISTS 02918_table_obj1;
+
+--
+DROP TABLE IF EXISTS 02918_table_obj2;
+CREATE TABLE 02918_table_obj2 (json_obj Object('json')) Engine=Memory;
+
+INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str=
+    '{
+      "name": {
+        "first": "Joan",
+        "last": "of Arc"
+      },
+      "birth": {"date": "January 6, 1412", "place": "Domremy, France"},
+      "death": {"date": "May 30, 1431", "place": "Rouen, France"},
+      "occupation": "Military Leader",
+      "achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"],
+      "legacy": {
+        "honors": ["Canonized Saint", "National Heroine of France"],
+        "memorials": [
+        {"name": "Joan of Arc Memorial", "location": "Domremy"},
+        {"name": "Place Jeanne d\'Arc", "location": "Rouen"}
+        ]
+      }
+    }',
+    random_seed=12345,
+    max_output_length=1024) LIMIT 50;
+
+INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON(
+    02918_json_fuzzer,
+    json_str=
+    '{
+      "name": {
+        "first": "Joan",
+        "last": "of Arc"
+      },
+      "birth": {"date": "January 6, 1412", "place": "Domremy, France"},
+      "death": {"date": "May 30, 1431", "place": "Rouen, France"},
+      "occupation": "Military Leader",
+      "achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"],
+      "legacy": {
+        "honors": ["Canonized Saint", "National Heroine of France"],
+        "memorials": [
+        {"name": "Joan of Arc Memorial", "location": "Domremy"},
+        {"name": "Place Jeanne d\'Arc", "location": "Rouen"}
+        ]
+      }
+    }',
+    random_seed=12345,
+    max_output_length=1024, malform_output=true) LIMIT 50; -- {serverError INCORRECT_DATA }
+
+SELECT count() FROM 02918_table_obj2;
+
+DROP TABLE IF EXISTS 02918_table_obj2;
 
 DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;

From 781ba523764e0d477bd2552038457565f444b835 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 13 Dec 2023 22:12:53 +0100
Subject: [PATCH 209/213] ClickBench: slightly better

---
 docker/test/clickbench/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh
index ba9580f55ae..3d27a40bb74 100755
--- a/docker/test/clickbench/run.sh
+++ b/docker/test/clickbench/run.sh
@@ -55,7 +55,7 @@ QUERY_NUM=1
 while read -r query; do
     echo -n "["
     for i in $(seq 1 $TRIES); do
-        RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:)
+        RES=$(clickhouse-client --query_id "q${QUERY_NUM}-${i}" --time --format Null --query "$query" --progress 0 2>&1 ||:)
         echo -n "${RES}"
         [[ "$i" != "$TRIES" ]] && echo -n ", "
 

From c29007beeb876819f8673f237056c1334cceb7fd Mon Sep 17 00:00:00 2001
From: andrewzolotukhin <andrewzolotukhin@gmail.com>
Date: Thu, 14 Dec 2023 04:54:52 +0200
Subject: [PATCH 210/213] Fixed typo in string-functions.md

---
 docs/en/sql-reference/functions/string-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index fffbea31d0d..e9db47a5c4c 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1251,7 +1251,7 @@ This function also replaces numeric character references with Unicode characters
 **Syntax**
 
 ``` sql
-decodeHTMComponent(x)
+decodeHTMLComponent(x)
 ```
 
 **Arguments**

From 15dc0ed610998b847cb0752f5721c55d538fb629 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 14 Dec 2023 04:34:32 +0100
Subject: [PATCH 211/213] Remove `arrayFold`

---
 .../functions/array-functions.md              |  54 ----
 src/Functions/array/arrayFold.cpp             | 236 ------------------
 tests/performance/array_fold.xml              |   5 -
 .../0_stateless/02718_array_fold.reference    |  25 --
 .../queries/0_stateless/02718_array_fold.sql  |  24 --
 5 files changed, 344 deletions(-)
 delete mode 100644 src/Functions/array/arrayFold.cpp
 delete mode 100644 tests/performance/array_fold.xml
 delete mode 100644 tests/queries/0_stateless/02718_array_fold.reference
 delete mode 100644 tests/queries/0_stateless/02718_array_fold.sql

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 00efa63c960..a058e1db6b4 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -1081,10 +1081,6 @@ Result:
 └─────────────────────────────────────────────────────────────┘
 ```
 
-**See also**
-
-- [arrayFold](#arrayfold)
-
 ## arrayReduceInRanges
 
 Applies an aggregate function to array elements in given ranges and returns an array containing the result corresponding to each range. The function will return the same result as multiple `arrayReduce(agg_func, arraySlice(arr1, index, length), ...)`.
@@ -1127,56 +1123,6 @@ Result:
 └─────────────────────────────┘
 ```
 
-## arrayFold
-
-Applies a lambda function to one or more equally-sized arrays and collects the result in an accumulator.
-
-**Syntax**
-
-``` sql
-arrayFold(lambda_function, arr1, arr2, ..., accumulator)
-```
-
-**Example**
-
-Query:
-
-``` sql
-SELECT arrayFold( acc,x -> acc + x*2,  [1, 2, 3, 4], toInt64(3)) AS res;
-```
-
-Result:
-
-``` text
-┌─res─┐
-│  23 │
-└─────┘
-```
-
-**Example with the Fibonacci sequence**
-
-```sql
-SELECT arrayFold( acc,x -> (acc.2, acc.2 + acc.1), range(number), (1::Int64, 0::Int64)).1 AS fibonacci
-FROM numbers(1,10);
-
-┌─fibonacci─┐
-│         0 │
-│         1 │
-│         1 │
-│         2 │
-│         3 │
-│         5 │
-│         8 │
-│        13 │
-│        21 │
-│        34 │
-└───────────┘
-```
-
-**See also**
-
-- [arrayReduce](#arrayreduce)
-
 ## arrayReverse(arr)
 
 Returns an array of the same size as the original array containing the elements in reverse order.
diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp
deleted file mode 100644
index b5b650e7289..00000000000
--- a/src/Functions/array/arrayFold.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-#include "FunctionArrayMapped.h"
-#include <Functions/FunctionFactory.h>
-#include <Common/Exception.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
-    extern const int TYPE_MISMATCH;
-}
-
-/**
- * arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, accum_initial) - apply the expression to each element of the array (or set of arrays).
- */
-class ArrayFold : public IFunction
-{
-public:
-    static constexpr auto name = "arrayFold";
-    static FunctionPtr create(ContextPtr) { return std::make_shared<ArrayFold>(); }
-
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    void getLambdaArgumentTypes(DataTypes & arguments) const override
-    {
-        if (arguments.size() < 3)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator", getName());
-
-        DataTypes accumulator_and_array_types(arguments.size() - 1);
-        accumulator_and_array_types[0] = arguments.back();
-        for (size_t i = 1; i < accumulator_and_array_types.size(); ++i)
-        {
-            const auto * array_type = checkAndGetDataType<DataTypeArray>(&*arguments[i]);
-            if (!array_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be of type Array, found {} instead", i + 1, getName(), arguments[i]->getName());
-            accumulator_and_array_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType());
-        }
-
-        const auto * lambda_function_type = checkAndGetDataType<DataTypeFunction>(arguments[0].get());
-        if (!lambda_function_type || lambda_function_type->getArgumentTypes().size() != accumulator_and_array_types.size())
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must be a lambda function with {} arguments, found {} instead.",
-                            getName(), accumulator_and_array_types.size(), arguments[0]->getName());
-
-        arguments[0] = std::make_shared<DataTypeFunction>(accumulator_and_array_types);
-    }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        if (arguments.size() < 3)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator", getName());
-
-        const auto * lambda_function_type = checkAndGetDataType<DataTypeFunction>(arguments[0].type.get());
-        if (!lambda_function_type)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName());
-
-        auto accumulator_type = arguments.back().type;
-        auto lambda_type = lambda_function_type->getReturnType();
-        if (!accumulator_type->equals(*lambda_type))
-            throw Exception(ErrorCodes::TYPE_MISMATCH,
-                    "Return type of lambda function must be the same as the accumulator type, inferred return type of lambda: {}, inferred type of accumulator: {}",
-                    lambda_type->getName(), accumulator_type->getName());
-
-        return accumulator_type;
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const auto & lambda_function_with_type_and_name = arguments[0];
-
-        if (!lambda_function_with_type_and_name.column)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName());
-
-        const auto * lambda_function = typeid_cast<const ColumnFunction *>(lambda_function_with_type_and_name.column.get());
-        if (!lambda_function)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName());
-
-        ColumnPtr offsets_column;
-        ColumnPtr column_first_array_ptr;
-        const ColumnArray * column_first_array = nullptr;
-        ColumnsWithTypeAndName arrays;
-        arrays.reserve(arguments.size() - 1);
-
-        /// Validate input types and get input array columns in convenient form
-        for (size_t i = 1; i < arguments.size() - 1; ++i)
-        {
-            const auto & array_with_type_and_name = arguments[i];
-            ColumnPtr column_array_ptr = array_with_type_and_name.column;
-            const auto * column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
-            if (!column_array)
-            {
-                const ColumnConst * column_const_array = checkAndGetColumnConst<ColumnArray>(column_array_ptr.get());
-                if (!column_const_array)
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected array column, found {}", column_array_ptr->getName());
-                column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn());
-                column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
-            }
-
-            const DataTypePtr & array_type_ptr = array_with_type_and_name.type;
-            const auto * array_type = checkAndGetDataType<DataTypeArray>(array_type_ptr.get());
-            if (!array_type)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {}", array_type_ptr->getName());
-
-            if (!offsets_column)
-                offsets_column = column_array->getOffsetsPtr();
-            else
-            {
-                /// The first condition is optimization: do not compare data if the pointers are equal.
-                if (column_array->getOffsetsPtr() != offsets_column
-                    && column_array->getOffsets() != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
-                    throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName());
-            }
-            if (i == 1)
-            {
-                column_first_array_ptr = column_array_ptr;
-                column_first_array = column_array;
-            }
-            arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(),
-                                                      recursiveRemoveLowCardinality(array_type->getNestedType()),
-                                                      array_with_type_and_name.name));
-        }
-
-        ssize_t rows_count = input_rows_count;
-        ssize_t data_row_count = arrays[0].column->size();
-        size_t array_count = arrays.size();
-
-        if (rows_count == 0)
-            return arguments.back().column->convertToFullColumnIfConst()->cloneEmpty();
-
-        ColumnPtr current_column = arguments.back().column->convertToFullColumnIfConst();
-        MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty();
-
-        size_t max_array_size = 0;
-        const auto & offsets = column_first_array->getOffsets();
-
-        IColumn::Selector selector(data_row_count);
-        size_t cur_ind = 0;
-        ssize_t cur_arr = 0;
-
-        /// skip to the first non empty array
-        if (data_row_count)
-            while (offsets[cur_arr] == 0)
-                ++cur_arr;
-
-        /// selector[i] is an index that i_th data element has in an array it corresponds to
-        for (ssize_t i = 0; i < data_row_count; ++i)
-        {
-            selector[i] = cur_ind;
-            cur_ind++;
-            if (cur_ind > max_array_size)
-                max_array_size = cur_ind;
-            while (cur_arr < rows_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr - 1])
-            {
-                ++cur_arr;
-                cur_ind = 0;
-            }
-        }
-
-        std::vector<MutableColumns> data_arrays;
-        data_arrays.resize(array_count);
-
-        /// Split each data column to columns containing elements of only Nth index in array
-        if (max_array_size > 0)
-            for (size_t i = 0; i < array_count; ++i)
-                data_arrays[i] = arrays[i].column->scatter(max_array_size, selector);
-
-        size_t prev_size = rows_count;
-
-        IColumn::Permutation inverse_permutation(rows_count);
-        size_t inverse_permutation_count = 0;
-
-        /// current_column after each iteration contains value of accumulator after applying values under indexes of arrays.
-        /// At each iteration only rows of current_column with arrays that still has unapplied elements are kept.
-        /// Discarded rows which contain finished calculations are added to result_data column and as we insert them we save their original row_number in inverse_permutation vector
-        for (size_t ind = 0; ind < max_array_size; ++ind)
-        {
-            IColumn::Selector prev_selector(prev_size);
-            size_t prev_ind = 0;
-            for (ssize_t irow = 0; irow < rows_count; ++irow)
-            {
-                if (offsets[irow] - offsets[irow - 1] > ind)
-                    prev_selector[prev_ind++] = 1;
-                else if (offsets[irow] - offsets[irow - 1] == ind)
-                {
-                    inverse_permutation[inverse_permutation_count++] = irow;
-                    prev_selector[prev_ind++] = 0;
-                }
-            }
-            auto prev = current_column->scatter(2, prev_selector);
-
-            result_data->insertRangeFrom(*(prev[0]), 0, prev[0]->size());
-
-            auto res_lambda = lambda_function->cloneResized(prev[1]->size());
-            auto * res_lambda_ptr = typeid_cast<ColumnFunction *>(res_lambda.get());
-
-            res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)}));
-            for (size_t i = 0; i < array_count; i++)
-                res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)}));
-
-            current_column = IColumn::mutate(res_lambda_ptr->reduce().column);
-            prev_size = current_column->size();
-        }
-
-        result_data->insertRangeFrom(*current_column, 0, current_column->size());
-        for (ssize_t irow = 0; irow < rows_count; ++irow)
-            if (offsets[irow] - offsets[irow - 1] == max_array_size)
-                inverse_permutation[inverse_permutation_count++] = irow;
-
-        /// We have result_data containing result for every row and inverse_permutation which contains indexes of rows in input it corresponds to.
-        /// Now we need to invert inverse_permuation and apply it to result_data to get rows in right order.
-        IColumn::Permutation perm(rows_count);
-        for (ssize_t i = 0; i < rows_count; i++)
-            perm[inverse_permutation[i]] = i;
-        return result_data->permute(perm, 0);
-    }
-
-private:
-    String getName() const override
-    {
-        return name;
-    }
-};
-
-REGISTER_FUNCTION(ArrayFold)
-{
-    factory.registerFunction<ArrayFold>(FunctionDocumentation{.description=R"(
-        Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, accum_initial) applies lambda function to a number of equally-sized arrays
-        and collects the result in an accumulator.
-        )", .examples{{"sum", "SELECT arrayFold(x,acc -> acc+x, [1,2,3,4], toInt64(1));", "11"}}, .categories{"Array"}});
-}
-}
diff --git a/tests/performance/array_fold.xml b/tests/performance/array_fold.xml
deleted file mode 100644
index 32bd45beb1e..00000000000
--- a/tests/performance/array_fold.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<test>
-    <query>SELECT arrayFold((acc, x) -> acc + x, range(number % 100), toUInt64(0)) from numbers(100000) Format Null</query>
-    <query>SELECT arrayFold((acc, x) -> acc + 1, range(number % 100), toUInt64(0)) from numbers(100000) Format Null</query>
-    <query>SELECT arrayFold((acc, x) -> acc + x, range(number), toUInt64(0)) from numbers(10000) Format Null</query>
-</test>
diff --git a/tests/queries/0_stateless/02718_array_fold.reference b/tests/queries/0_stateless/02718_array_fold.reference
deleted file mode 100644
index 4139232d145..00000000000
--- a/tests/queries/0_stateless/02718_array_fold.reference
+++ /dev/null
@@ -1,25 +0,0 @@
-Negative tests
-Const arrays
-23
-3
-101
-[1,2,3,4]
-[4,3,2,1]
-([4,3,2,1],[1,2,3,4])
-([1,3,5],[2,4,6])
-Non-const arrays
-0
-1
-3
-6
-10
-[]
-[0]
-[1,0]
-[2,1,0]
-[3,2,1,0]
-[]
-[0]
-[1,0]
-[1,0,2]
-[3,1,0,2]
diff --git a/tests/queries/0_stateless/02718_array_fold.sql b/tests/queries/0_stateless/02718_array_fold.sql
deleted file mode 100644
index 0486a5ce2e3..00000000000
--- a/tests/queries/0_stateless/02718_array_fold.sql
+++ /dev/null
@@ -1,24 +0,0 @@
-SELECT 'Negative tests';
-SELECT arrayFold(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT arrayFold(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT arrayFold(1, toUInt64(0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT arrayFold(1, emptyArrayUInt64(), toUInt64(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT arrayFold( acc,x -> x,  emptyArrayString(), toInt8(0)); -- { serverError TYPE_MISMATCH }
-SELECT arrayFold( acc,x -> x,  'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT arrayFold( acc,x,y -> x,  [0, 1], 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT arrayFold( acc,x -> x,  [0, 1], [2, 3], toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT arrayFold( acc,x,y -> x,  [0, 1], [2, 3, 4], toUInt8(0)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH }
-
-SELECT 'Const arrays';
-SELECT arrayFold( acc,x -> acc+x*2,  [1, 2, 3, 4], toInt64(3));
-SELECT arrayFold( acc,x -> acc+x*2,  emptyArrayInt64(), toInt64(3));
-SELECT arrayFold( acc,x,y -> acc+x*2+y*3,  [1, 2, 3, 4], [5, 6, 7, 8], toInt64(3));
-SELECT arrayFold( acc,x -> arrayPushBack(acc, x),  [1, 2, 3, 4], emptyArrayInt64());
-SELECT arrayFold( acc,x -> arrayPushFront(acc, x),  [1, 2, 3, 4], emptyArrayInt64());
-SELECT arrayFold( acc,x -> (arrayPushFront(acc.1, x),arrayPushBack(acc.2, x)),  [1, 2, 3, 4], (emptyArrayInt64(), emptyArrayInt64()));
-SELECT arrayFold( acc,x -> x%2 ? (arrayPushBack(acc.1, x), acc.2): (acc.1, arrayPushBack(acc.2, x)),  [1, 2, 3, 4, 5, 6], (emptyArrayInt64(), emptyArrayInt64()));
-
-SELECT 'Non-const arrays';
-SELECT arrayFold( acc,x -> acc+x,  range(number), number) FROM system.numbers LIMIT 5;
-SELECT arrayFold( acc,x -> arrayPushFront(acc,x),  range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5;
-SELECT arrayFold( acc,x -> x%2 ? arrayPushFront(acc,x) : arrayPushBack(acc,x),  range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5;

From 4c8cc4e0bd7c06dc7b2ec6124a890af45b309b21 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Thu, 14 Dec 2023 05:32:12 +0100
Subject: [PATCH 212/213] Update string-functions.md

---
 docs/en/sql-reference/functions/string-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index e9db47a5c4c..4f3c6e1e858 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1268,7 +1268,7 @@ Type: [String](../../sql-reference/data-types/string.md).
 
 ``` sql
 SELECT decodeHTMLComponent(''CH');
-SELECT decodeHMLComponent('I&heartsuit;ClickHouse');
+SELECT decodeHTMLComponent('I&heartsuit;ClickHouse');
 ```
 
 Result:

From a0af0392cd36826cbadee499c284d4e70de16c2f Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Thu, 14 Dec 2023 12:47:11 +0100
Subject: [PATCH 213/213] Random changes in random files (#57642)

---
 src/Access/Common/AccessType.h                 |  1 +
 src/Common/CurrentMetrics.cpp                  |  1 +
 src/Common/ZooKeeper/TestKeeper.cpp            | 11 +++++++++++
 src/Common/ZooKeeper/ZooKeeper.cpp             | 11 +++++++++++
 src/Common/ZooKeeper/ZooKeeper.h               | 10 ++++++++--
 src/Coordination/KeeperSnapshotManagerS3.cpp   |  5 ++++-
 src/Coordination/Standalone/Context.cpp        |  6 ++++++
 src/Coordination/Standalone/Context.h          |  7 +++++++
 src/Core/SettingsEnums.h                       |  2 ++
 src/Dictionaries/DictionaryFactory.cpp         |  6 ------
 src/Dictionaries/DictionaryFactory.h           |  5 -----
 src/Formats/MarkInCompressedFile.h             | 18 ++++++++++++------
 src/IO/ReadBufferFromS3.cpp                    |  2 ++
 src/IO/S3/PocoHTTPClientFactory.cpp            |  4 ++--
 src/IO/S3/PocoHTTPClientFactory.h              |  2 +-
 src/IO/S3/copyS3File.cpp                       |  1 +
 src/IO/S3Common.h                              |  1 +
 src/Interpreters/InterpreterSystemQuery.cpp    | 15 +++++++++++++++
 src/Interpreters/InterpreterSystemQuery.h      |  1 +
 src/Parsers/ASTSystemQuery.cpp                 |  3 ++-
 src/Parsers/ASTSystemQuery.h                   |  3 +++
 src/Parsers/ParserSystemQuery.cpp              |  9 +++++++++
 src/Server/HTTPHandler.cpp                     |  9 +++++++--
 src/Server/ReplicasStatusHandler.cpp           | 14 ++++++++++----
 src/Server/ServerType.cpp                      |  3 +++
 src/Server/ServerType.h                        |  1 +
 src/Storages/System/StorageSystemDatabases.cpp |  1 +
 src/Storages/System/StorageSystemMutations.cpp |  2 ++
 src/Storages/System/StorageSystemPartsBase.cpp |  2 ++
 .../01271_show_privileges.reference            |  1 +
 .../02117_show_create_table_system.reference   |  1 +
 31 files changed, 128 insertions(+), 30 deletions(-)

diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index f99ae2a8aea..45d427a7c55 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -183,6 +183,7 @@ enum class AccessType
     M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \
     M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \
     M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
+    M(SYSTEM_REPLICA_READINESS, "SYSTEM REPLICA READY, SYSTEM REPLICA UNREADY", GLOBAL, SYSTEM) \
     M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \
     M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \
     M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 186771cafc2..38b14e4b0b4 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -260,6 +260,7 @@
     #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M)
 #endif
 
+
 namespace CurrentMetrics
 {
     #define M(NAME, DOCUMENTATION) extern const Metric NAME = Metric(__COUNTER__);
diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp
index a19892736ea..a25329ad7c0 100644
--- a/src/Common/ZooKeeper/TestKeeper.cpp
+++ b/src/Common/ZooKeeper/TestKeeper.cpp
@@ -99,6 +99,7 @@ struct TestKeeperExistsRequest final : ExistsRequest, TestKeeperRequest
 struct TestKeeperGetRequest final : GetRequest, TestKeeperRequest
 {
     TestKeeperGetRequest() = default;
+    explicit TestKeeperGetRequest(const GetRequest & base) : GetRequest(base) {}
     ResponsePtr createResponse() const override;
     std::pair<ResponsePtr, Undo> process(TestKeeper::Container & container, int64_t zxid) const override;
 };
@@ -118,6 +119,8 @@ struct TestKeeperSetRequest final : SetRequest, TestKeeperRequest
 
 struct TestKeeperListRequest : ListRequest, TestKeeperRequest
 {
+    TestKeeperListRequest() = default;
+    explicit TestKeeperListRequest(const ListRequest & base) : ListRequest(base) {}
     ResponsePtr createResponse() const override;
     std::pair<ResponsePtr, Undo> process(TestKeeper::Container & container, int64_t zxid) const override;
 };
@@ -176,6 +179,14 @@ struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest
             {
                 requests.push_back(std::make_shared<TestKeeperCheckRequest>(*concrete_request_check));
             }
+            else if (const auto * concrete_request_get = dynamic_cast<const GetRequest *>(generic_request.get()))
+            {
+                requests.push_back(std::make_shared<TestKeeperGetRequest>(*concrete_request_get));
+            }
+            else if (const auto * concrete_request_list = dynamic_cast<const ListRequest *>(generic_request.get()))
+            {
+                requests.push_back(std::make_shared<TestKeeperListRequest>(*concrete_request_list));
+            }
             else
                 throw Exception::fromMessage(Error::ZBADARGUMENTS, "Illegal command as part of multi ZooKeeper request");
         }
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 8d18494e964..e682eaaea0d 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -497,6 +497,17 @@ bool ZooKeeper::exists(const std::string & path, Coordination::Stat * stat, cons
     return existsWatch(path, stat, callbackForEvent(watch));
 }
 
+bool ZooKeeper::anyExists(const std::vector<std::string> & paths)
+{
+    auto exists_multi_response = exists(paths);
+    for (size_t i = 0; i < exists_multi_response.size(); ++i)
+    {
+        if (exists_multi_response[i].error == Coordination::Error::ZOK)
+            return true;
+    }
+    return false;
+}
+
 bool ZooKeeper::existsWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback)
 {
     Coordination::Error code = existsImpl(path, stat, watch_callback);
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 785842b94bd..73b730f60b3 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -286,6 +286,8 @@ public:
         return exists(paths.begin(), paths.end());
     }
 
+    bool anyExists(const std::vector<std::string> & paths);
+
     std::string get(const std::string & path, Coordination::Stat * stat = nullptr, const EventPtr & watch = nullptr);
     std::string getWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback);
 
@@ -422,8 +424,9 @@ public:
     /// Performs several operations in a transaction.
     /// Throws on every error.
     Coordination::Responses multi(const Coordination::Requests & requests);
-    /// Throws only if some operation has returned an "unexpected" error
-    /// - an error that would cause the corresponding try- method to throw.
+    /// Throws only if some operation has returned an "unexpected" error - an error that would cause
+    /// the corresponding try- method to throw.
+    /// On exception, `responses` may or may not be populated.
     Coordination::Error tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses);
     /// Throws nothing (even session expired errors)
     Coordination::Error tryMultiNoThrow(const Coordination::Requests & requests, Coordination::Responses & responses);
@@ -567,8 +570,11 @@ public:
     void setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_);
 
     UInt32 getSessionUptime() const { return static_cast<UInt32>(session_uptime.elapsedSeconds()); }
+
     bool hasReachedDeadline() const { return impl->hasReachedDeadline(); }
 
+    uint64_t getSessionTimeoutMS() const { return args.session_timeout_ms; }
+
     void setServerCompletelyStarted();
 
     Int8 getConnectedHostIdx() const;
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index 56f64d58e2f..d76e310f2a3 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -208,6 +208,9 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
             return;
         }
 
+        /// To avoid reference to binding
+        const auto & snapshot_path_ref = snapshot_path;
+
         SCOPE_EXIT(
         {
             LOG_INFO(log, "Removing lock file");
@@ -223,7 +226,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
             }
             catch (...)
             {
-                LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_file_info.path);
+                LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_path_ref);
                 tryLogCurrentException(__PRETTY_FUNCTION__);
             }
         });
diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp
index 97a034b22a4..374610769c4 100644
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@@ -35,6 +35,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int UNSUPPORTED_METHOD;
 }
 
 struct ContextSharedPart : boost::noncopyable
@@ -376,4 +377,9 @@ void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::Abstr
     shared->keeper_dispatcher->updateConfiguration(getConfigRef(), getMacros());
 }
 
+std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const
+{
+    throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper");
+}
+
 }
diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h
index 5aecf850d7c..a6199864422 100644
--- a/src/Coordination/Standalone/Context.h
+++ b/src/Coordination/Standalone/Context.h
@@ -21,6 +21,11 @@
 #include <memory>
 
 #include "config.h"
+namespace zkutil
+{
+    class ZooKeeper;
+    using ZooKeeperPtr = std::shared_ptr<ZooKeeper>;
+}
 
 namespace DB
 {
@@ -153,6 +158,8 @@ public:
     void initializeKeeperDispatcher(bool start_async) const;
     void shutdownKeeperDispatcher() const;
     void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config);
+
+    zkutil::ZooKeeperPtr getZooKeeper() const;
 };
 
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 3d087d42386..7977a0b3ab6 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -133,6 +133,8 @@ enum class DefaultTableEngine
     ReplacingMergeTree,
     ReplicatedMergeTree,
     ReplicatedReplacingMergeTree,
+    SharedMergeTree,
+    SharedReplacingMergeTree,
     Memory,
 };
 
diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp
index c3102632167..f6102d7c657 100644
--- a/src/Dictionaries/DictionaryFactory.cpp
+++ b/src/Dictionaries/DictionaryFactory.cpp
@@ -69,12 +69,6 @@ DictionaryPtr DictionaryFactory::create(
         layout_type);
 }
 
-DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, ContextPtr global_context) const
-{
-    auto configuration = getDictionaryConfigurationFromAST(ast, global_context);
-    return DictionaryFactory::create(name, *configuration, "dictionary", global_context, true);
-}
-
 bool DictionaryFactory::isComplex(const std::string & layout_type) const
 {
     auto it = registered_layouts.find(layout_type);
diff --git a/src/Dictionaries/DictionaryFactory.h b/src/Dictionaries/DictionaryFactory.h
index 35097a5ed24..2834451df81 100644
--- a/src/Dictionaries/DictionaryFactory.h
+++ b/src/Dictionaries/DictionaryFactory.h
@@ -39,11 +39,6 @@ public:
         ContextPtr global_context,
         bool created_from_ddl) const;
 
-    /// Create dictionary from DDL-query
-    DictionaryPtr create(const std::string & name,
-        const ASTCreateQuery & ast,
-        ContextPtr global_context) const;
-
     using LayoutCreateFunction = std::function<DictionaryPtr(
         const std::string & name,
         const DictionaryStructure & dict_struct,
diff --git a/src/Formats/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h
index 08e4f182c45..a25033e2a14 100644
--- a/src/Formats/MarkInCompressedFile.h
+++ b/src/Formats/MarkInCompressedFile.h
@@ -10,6 +10,13 @@
 namespace DB
 {
 
+/// It's a bug in clang with three-way comparison operator
+/// https://github.com/llvm/llvm-project/issues/55919
+#ifdef __clang__
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+
 /** Mark is the position in the compressed file. The compressed file consists of adjacent compressed blocks.
   * Mark is a tuple - the offset in the file to the start of the compressed block, the offset in the decompressed block to the start of the data.
   */
@@ -18,12 +25,7 @@ struct MarkInCompressedFile
     size_t offset_in_compressed_file;
     size_t offset_in_decompressed_block;
 
-    bool operator==(const MarkInCompressedFile & rhs) const
-    {
-        return std::tie(offset_in_compressed_file, offset_in_decompressed_block)
-            == std::tie(rhs.offset_in_compressed_file, rhs.offset_in_decompressed_block);
-    }
-    bool operator!=(const MarkInCompressedFile & rhs) const { return !(*this == rhs); }
+    auto operator<=>(const MarkInCompressedFile &) const = default;
 
     auto asTuple() const { return std::make_tuple(offset_in_compressed_file, offset_in_decompressed_block); }
 
@@ -39,6 +41,10 @@ struct MarkInCompressedFile
     }
 };
 
+#ifdef __clang__
+    #pragma clang diagnostic pop
+#endif
+
 /**
  * In-memory representation of an array of marks.
  *
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index c9c9319c44c..36cac929e3f 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -515,7 +515,9 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si
     // We do not know in advance how many bytes we are going to consume, to avoid blocking estimated it from below
     constexpr ResourceCost estimated_cost = 1;
     ResourceGuard rlock(read_settings.resource_link, estimated_cost);
+
     Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
+
     rlock.unlock();
 
     if (outcome.IsSuccess())
diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp
index 9dd52a263b0..ef7af2d01ba 100644
--- a/src/IO/S3/PocoHTTPClientFactory.cpp
+++ b/src/IO/S3/PocoHTTPClientFactory.cpp
@@ -13,9 +13,9 @@
 namespace DB::S3
 {
 std::shared_ptr<Aws::Http::HttpClient>
-PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const
+PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const
 {
-    return std::make_shared<PocoHTTPClient>(static_cast<const PocoHTTPClientConfiguration &>(clientConfiguration));
+    return std::make_shared<PocoHTTPClient>(static_cast<const PocoHTTPClientConfiguration &>(client_configuration));
 }
 
 std::shared_ptr<Aws::Http::HttpRequest> PocoHTTPClientFactory::CreateHttpRequest(
diff --git a/src/IO/S3/PocoHTTPClientFactory.h b/src/IO/S3/PocoHTTPClientFactory.h
index 4e555f05502..60704332e7b 100644
--- a/src/IO/S3/PocoHTTPClientFactory.h
+++ b/src/IO/S3/PocoHTTPClientFactory.h
@@ -15,7 +15,7 @@ class PocoHTTPClientFactory : public Aws::Http::HttpClientFactory
 public:
     ~PocoHTTPClientFactory() override = default;
     [[nodiscard]] std::shared_ptr<Aws::Http::HttpClient>
-    CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const override;
+    CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const override;
     [[nodiscard]] std::shared_ptr<Aws::Http::HttpRequest>
     CreateHttpRequest(const Aws::String & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const override;
     [[nodiscard]] std::shared_ptr<Aws::Http::HttpRequest>
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 819c345938d..25de61360fe 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -655,6 +655,7 @@ namespace
 
         void performCopy()
         {
+            LOG_TEST(log, "Copy object {} to {} using native copy", src_key, dest_key);
             if (!supports_multipart_copy || size <= upload_settings.max_single_operation_copy_size)
                 performSingleOperationCopy();
             else
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 71d52c727c7..8c45c1c34a7 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -16,6 +16,7 @@
 #include <Common/Throttler_fwd.h>
 
 #include <IO/S3/URI.h>
+#include <IO/S3/Credentials.h>
 
 #include <aws/core/Aws.h>
 #include <aws/s3/S3Errors.h>
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index db10d377cc1..e3b0c2df567 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -45,6 +45,7 @@
 #include <Access/Common/AllowedClientHosts.h>
 #include <Databases/IDatabase.h>
 #include <Databases/DatabaseReplicated.h>
+#include <Disks/ObjectStorages/IMetadataStorage.h>
 #include <Storages/StorageDistributed.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/Freeze.h>
@@ -92,6 +93,7 @@ namespace ErrorCodes
     extern const int TIMEOUT_EXCEEDED;
     extern const int TABLE_WAS_NOT_DROPPED;
     extern const int ABORTED;
+    extern const int SUPPORT_IS_DISABLED;
 }
 
 
@@ -442,6 +444,10 @@ BlockIO InterpreterSystemQuery::execute()
             result.pipeline = QueryPipeline(std::move(source));
             break;
         }
+        case Type::DROP_DISK_METADATA_CACHE:
+        {
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented");
+        }
         case Type::DROP_SCHEMA_CACHE:
         {
             getContext()->checkAccess(AccessType::SYSTEM_DROP_SCHEMA_CACHE);
@@ -611,6 +617,10 @@ BlockIO InterpreterSystemQuery::execute()
         case Type::SYNC_DATABASE_REPLICA:
             syncReplicatedDatabase(query);
             break;
+        case Type::REPLICA_UNREADY:
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented");
+        case Type::REPLICA_READY:
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented");
         case Type::SYNC_TRANSACTION_LOG:
             syncTransactionLog();
             break;
@@ -1119,6 +1129,8 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
             required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE);
             break;
         }
+        case Type::DROP_DISK_METADATA_CACHE:
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented");
         case Type::RELOAD_DICTIONARY:
         case Type::RELOAD_DICTIONARIES:
         case Type::RELOAD_EMBEDDED_DICTIONARIES:
@@ -1245,6 +1257,9 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
             required_access.emplace_back(AccessType::SYSTEM_SYNC_REPLICA, query.getDatabase(), query.getTable());
             break;
         }
+        case Type::REPLICA_READY:
+        case Type::REPLICA_UNREADY:
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented");
         case Type::RESTART_REPLICA:
         {
             required_access.emplace_back(AccessType::SYSTEM_RESTART_REPLICA, query.getDatabase(), query.getTable());
diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h
index 826d4cc0c69..462449623d0 100644
--- a/src/Interpreters/InterpreterSystemQuery.h
+++ b/src/Interpreters/InterpreterSystemQuery.h
@@ -57,6 +57,7 @@ private:
     void restartReplica(const StorageID & replica, ContextMutablePtr system_context);
     void restartReplicas(ContextMutablePtr system_context);
     void syncReplica(ASTSystemQuery & query);
+    void setReplicaReadiness(bool ready);
     void waitLoadingParts();
 
     void syncReplicatedDatabase(ASTSystemQuery & query);
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 77235dfb6c2..c005d49a93d 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -179,7 +179,8 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
             || type == Type::RELOAD_DICTIONARY
             || type == Type::RELOAD_MODEL
             || type == Type::RELOAD_FUNCTION
-            || type == Type::RESTART_DISK)
+            || type == Type::RESTART_DISK
+            || type == Type::DROP_DISK_METADATA_CACHE)
     {
         if (table)
         {
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 5f7ba5be330..8e6100fe7b4 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -32,6 +32,7 @@ public:
         DROP_COMPILED_EXPRESSION_CACHE,
 #endif
         DROP_FILESYSTEM_CACHE,
+        DROP_DISK_METADATA_CACHE,
         DROP_SCHEMA_CACHE,
         DROP_FORMAT_SCHEMA_CACHE,
 #if USE_AWS_S3
@@ -49,6 +50,8 @@ public:
         SYNC_DATABASE_REPLICA,
         SYNC_TRANSACTION_LOG,
         SYNC_FILE_CACHE,
+        REPLICA_READY,
+        REPLICA_UNREADY,
         RELOAD_DICTIONARY,
         RELOAD_DICTIONARIES,
         RELOAD_MODEL,
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index 2f6a1142a8f..2e1283187d3 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -12,6 +12,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int SUPPORT_IS_DISABLED;
+}
+
 [[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr<ASTSystemQuery> & res, IParser::Pos & pos,
                                                  Expected & expected, bool require_table, bool allow_string_literal)
 {
@@ -427,6 +432,10 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
                 return false;
             break;
         }
+        case Type::DROP_DISK_METADATA_CACHE:
+        {
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented");
+        }
         case Type::DROP_SCHEMA_CACHE:
         {
             if (ParserKeyword{"FOR"}.ignore(pos, expected))
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index f9cd3b40f4a..ffa8c11fa44 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -1061,8 +1061,13 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
             response.setChunkedTransferEncoding(true);
 
         HTMLForm params(default_settings, request);
-        with_stacktrace = params.getParsed<bool>("stacktrace", false);
-        close_session = params.getParsed<bool>("close_session", false);
+
+        if (params.getParsed<bool>("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true))
+            with_stacktrace = true;
+
+        if (params.getParsed<bool>("close_session", false) && server.config().getBool("enable_http_close_session", true))
+            close_session = true;
+
         if (close_session)
             session_id = params.get("session_id");
 
diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp
index ad54b24f31d..2b7a52572a8 100644
--- a/src/Server/ReplicasStatusHandler.cpp
+++ b/src/Server/ReplicasStatusHandler.cpp
@@ -28,11 +28,17 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
     {
         HTMLForm params(getContext()->getSettingsRef(), request);
 
-        /// Even if lag is small, output detailed information about the lag.
-        bool verbose = params.get("verbose", "") == "1";
+        const auto & config = getContext()->getConfigRef();
 
         const MergeTreeSettings & settings = getContext()->getReplicatedMergeTreeSettings();
 
+        /// Even if lag is small, output detailed information about the lag.
+        bool verbose = false;
+        bool enable_verbose = config.getBool("enable_verbose_replicas_status", true);
+
+        if (params.get("verbose", "") == "1" && enable_verbose)
+            verbose = true;
+
         bool ok = true;
         WriteBufferFromOwnString message;
 
@@ -78,13 +84,13 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
             }
         }
 
-        const auto & config = getContext()->getConfigRef();
         setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT));
 
         if (!ok)
         {
             response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE);
-            verbose = true;
+            if (enable_verbose)
+                verbose = true;
         }
 
         if (verbose)
diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp
index fb052e7d6e6..b0511632e6e 100644
--- a/src/Server/ServerType.cpp
+++ b/src/Server/ServerType.cpp
@@ -144,6 +144,9 @@ bool ServerType::shouldStop(const std::string & port_name) const
         port_custom_name = port_name.substr(protocols_size, port_name.size() - protocols_size - ports_size + 1);
     }
 
+    else if (port_name == "cloud.port")
+        port_type = Type::CLOUD;
+
     else
         return false;
 
diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h
index 07bb74ea009..c31fb663811 100644
--- a/src/Server/ServerType.h
+++ b/src/Server/ServerType.h
@@ -26,6 +26,7 @@ public:
         QUERIES_ALL,
         QUERIES_DEFAULT,
         QUERIES_CUSTOM,
+        CLOUD,
         END
     };
 
diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp
index 1fa94fab7bf..6dbe780193d 100644
--- a/src/Storages/System/StorageSystemDatabases.cpp
+++ b/src/Storages/System/StorageSystemDatabases.cpp
@@ -9,6 +9,7 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Common/logger_useful.h>
+#include <Parsers/formatAST.h>
 
 
 namespace DB
diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp
index fa521c632b8..b250765c4d2 100644
--- a/src/Storages/System/StorageSystemMutations.cpp
+++ b/src/Storages/System/StorageSystemMutations.cpp
@@ -28,6 +28,7 @@ NamesAndTypesList StorageSystemMutations::getNamesAndTypes()
         { "parts_to_do_names",          std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()) },
         { "parts_to_do",                std::make_shared<DataTypeInt64>() },
         { "is_done",                    std::make_shared<DataTypeUInt8>() },
+        { "is_killed",                  std::make_shared<DataTypeUInt8>() },
         { "latest_failed_part",         std::make_shared<DataTypeString>() },
         { "latest_fail_time",           std::make_shared<DataTypeDateTime>() },
         { "latest_fail_reason",         std::make_shared<DataTypeString>() },
@@ -138,6 +139,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr c
             res_columns[col_num++]->insert(parts_to_do_names);
             res_columns[col_num++]->insert(parts_to_do_names.size());
             res_columns[col_num++]->insert(status.is_done);
+            res_columns[col_num++]->insert(status.is_killed);
             res_columns[col_num++]->insert(status.latest_failed_part);
             res_columns[col_num++]->insert(UInt64(status.latest_fail_time));
             res_columns[col_num++]->insert(status.latest_fail_reason);
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 513af6cfc46..8d2e2900722 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -285,6 +285,8 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Name
 
     auto add_alias = [&](const String & alias_name, const String & column_name)
     {
+        if (!tmp_columns.has(column_name))
+            return;
         ColumnDescription column(alias_name, tmp_columns.get(column_name).type);
         column.default_desc.kind = ColumnDefaultKind::Alias;
         column.default_desc.expression = std::make_shared<ASTIdentifier>(column_name);
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index 6c639926aac..e2c0655b2bc 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -133,6 +133,7 @@ SYSTEM SENDS	['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS
 SYSTEM REPLICATION QUEUES	['SYSTEM STOP REPLICATION QUEUES','SYSTEM START REPLICATION QUEUES','STOP REPLICATION QUEUES','START REPLICATION QUEUES']	TABLE	SYSTEM
 SYSTEM DROP REPLICA	['DROP REPLICA']	TABLE	SYSTEM
 SYSTEM SYNC REPLICA	['SYNC REPLICA']	TABLE	SYSTEM
+SYSTEM REPLICA READINESS	['SYSTEM REPLICA READY','SYSTEM REPLICA UNREADY']	GLOBAL	SYSTEM
 SYSTEM RESTART REPLICA	['RESTART REPLICA']	TABLE	SYSTEM
 SYSTEM RESTORE REPLICA	['RESTORE REPLICA']	TABLE	SYSTEM
 SYSTEM WAIT LOADING PARTS	['WAIT LOADING PARTS']	TABLE	SYSTEM
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 9ed905a0df8..2e9d733aeb3 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -406,6 +406,7 @@ CREATE TABLE system.mutations
     `parts_to_do_names` Array(String),
     `parts_to_do` Int64,
     `is_done` UInt8,
+    `is_killed` UInt8,
     `latest_failed_part` String,
     `latest_fail_time` DateTime,
     `latest_fail_reason` String